diff options
Diffstat (limited to 'fs')
77 files changed, 1579 insertions, 817 deletions
diff --git a/fs/affs/namei.c b/fs/affs/namei.c index d8aa0ae3d037..41c5749f4db7 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -201,14 +201,16 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) struct super_block *sb = dir->i_sb; struct buffer_head *bh; struct inode *inode = NULL; + struct dentry *res; pr_debug("%s(\"%pd\")\n", __func__, dentry); affs_lock_dir(dir); bh = affs_find_entry(dir, dentry); - affs_unlock_dir(dir); - if (IS_ERR(bh)) + if (IS_ERR(bh)) { + affs_unlock_dir(dir); return ERR_CAST(bh); + } if (bh) { u32 ino = bh->b_blocknr; @@ -222,11 +224,12 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) } affs_brelse(bh); inode = affs_iget(sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); } - d_add(dentry, inode); - return NULL; + res = d_splice_alias(inode, dentry); + if (!IS_ERR_OR_NULL(res)) + res->d_fsdata = dentry->d_fsdata; + affs_unlock_dir(dir); + return res; } int diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 3bedfed608a2..7587fb665ff1 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -121,7 +121,7 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, p = text; do { struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs]; - char tdelim = delim; + const char *q, *stop; if (*p == delim) { p++; @@ -130,28 +130,33 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, if (*p == '[') { p++; - tdelim = ']'; + q = memchr(p, ']', end - p); + } else { + for (q = p; q < end; q++) + if (*q == '+' || *q == delim) + break; } - if (in4_pton(p, end - p, + if (in4_pton(p, q - p, (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], - tdelim, &p)) { + -1, &stop)) { srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - } else if (in6_pton(p, end - p, + } else if (in6_pton(p, q - p, srx->transport.sin6.sin6_addr.s6_addr, - tdelim, &p)) { + -1, &stop)) { /* Nothing to do */ } else { goto bad_address; } - if (tdelim == ']') { - if (p == end || *p != ']') - goto bad_address; + if (stop != q) + goto bad_address; + + p = q; + if (q < end && *q == ']') p++; - } if (p < end) { if (*p == '+') { diff --git a/fs/afs/callback.c b/fs/afs/callback.c index abd9a84f4e88..571437dcb252 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -23,36 +23,55 @@ /* * Set up an interest-in-callbacks record for a volume on a server and * register it with the server. - * - Called with volume->server_sem held. + * - Called with vnode->io_lock held. */ int afs_register_server_cb_interest(struct afs_vnode *vnode, - struct afs_server_entry *entry) + struct afs_server_list *slist, + unsigned int index) { - struct afs_cb_interest *cbi = entry->cb_interest, *vcbi, *new, *x; + struct afs_server_entry *entry = &slist->servers[index]; + struct afs_cb_interest *cbi, *vcbi, *new, *old; struct afs_server *server = entry->server; again: + if (vnode->cb_interest && + likely(vnode->cb_interest == entry->cb_interest)) + return 0; + + read_lock(&slist->lock); + cbi = afs_get_cb_interest(entry->cb_interest); + read_unlock(&slist->lock); + vcbi = vnode->cb_interest; if (vcbi) { - if (vcbi == cbi) + if (vcbi == cbi) { + afs_put_cb_interest(afs_v2net(vnode), cbi); return 0; + } + /* Use a new interest in the server list for the same server + * rather than an old one that's still attached to a vnode. + */ if (cbi && vcbi->server == cbi->server) { write_seqlock(&vnode->cb_lock); - vnode->cb_interest = afs_get_cb_interest(cbi); + old = vnode->cb_interest; + vnode->cb_interest = cbi; write_sequnlock(&vnode->cb_lock); - afs_put_cb_interest(afs_v2net(vnode), cbi); + afs_put_cb_interest(afs_v2net(vnode), old); return 0; } + /* Re-use the one attached to the vnode. */ if (!cbi && vcbi->server == server) { - afs_get_cb_interest(vcbi); - x = cmpxchg(&entry->cb_interest, cbi, vcbi); - if (x != cbi) { - cbi = x; - afs_put_cb_interest(afs_v2net(vnode), vcbi); + write_lock(&slist->lock); + if (entry->cb_interest) { + write_unlock(&slist->lock); + afs_put_cb_interest(afs_v2net(vnode), cbi); goto again; } + + entry->cb_interest = cbi; + write_unlock(&slist->lock); return 0; } } @@ -72,13 +91,16 @@ again: list_add_tail(&new->cb_link, &server->cb_interests); write_unlock(&server->cb_break_lock); - x = cmpxchg(&entry->cb_interest, cbi, new); - if (x == cbi) { + write_lock(&slist->lock); + if (!entry->cb_interest) { + entry->cb_interest = afs_get_cb_interest(new); cbi = new; + new = NULL; } else { - cbi = x; - afs_put_cb_interest(afs_v2net(vnode), new); + cbi = afs_get_cb_interest(entry->cb_interest); } + write_unlock(&slist->lock); + afs_put_cb_interest(afs_v2net(vnode), new); } ASSERT(cbi); @@ -88,11 +110,14 @@ again: */ write_seqlock(&vnode->cb_lock); - vnode->cb_interest = afs_get_cb_interest(cbi); + old = vnode->cb_interest; + vnode->cb_interest = cbi; vnode->cb_s_break = cbi->server->cb_s_break; + vnode->cb_v_break = vnode->volume->cb_v_break; clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); write_sequnlock(&vnode->cb_lock); + afs_put_cb_interest(afs_v2net(vnode), old); return 0; } @@ -171,13 +196,24 @@ static void afs_break_one_callback(struct afs_server *server, if (cbi->vid != fid->vid) continue; - data.volume = NULL; - data.fid = *fid; - inode = ilookup5_nowait(cbi->sb, fid->vnode, afs_iget5_test, &data); - if (inode) { - vnode = AFS_FS_I(inode); - afs_break_callback(vnode); - iput(inode); + if (fid->vnode == 0 && fid->unique == 0) { + /* The callback break applies to an entire volume. */ + struct afs_super_info *as = AFS_FS_S(cbi->sb); + struct afs_volume *volume = as->volume; + + write_lock(&volume->cb_break_lock); + volume->cb_v_break++; + write_unlock(&volume->cb_break_lock); + } else { + data.volume = NULL; + data.fid = *fid; + inode = ilookup5_nowait(cbi->sb, fid->vnode, + afs_iget5_test, &data); + if (inode) { + vnode = AFS_FS_I(inode); + afs_break_callback(vnode); + iput(inode); + } } } @@ -195,6 +231,8 @@ void afs_break_callbacks(struct afs_server *server, size_t count, ASSERT(server != NULL); ASSERTCMP(count, <=, AFSCBMAX); + /* TODO: Sort the callback break list by volume ID */ + for (; count > 0; callbacks++, count--) { _debug("- Fid { vl=%08x n=%u u=%u } CB { v=%u x=%u t=%u }", callbacks->fid.vid, diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 357de908df3a..c332c95a6940 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -133,21 +133,10 @@ bool afs_cm_incoming_call(struct afs_call *call) } /* - * clean up a cache manager call + * Clean up a cache manager call. */ static void afs_cm_destructor(struct afs_call *call) { - _enter(""); - - /* Break the callbacks here so that we do it after the final ACK is - * received. The step number here must match the final number in - * afs_deliver_cb_callback(). - */ - if (call->unmarshall == 5) { - ASSERT(call->cm_server && call->count && call->request); - afs_break_callbacks(call->cm_server, call->count, call->request); - } - kfree(call->buffer); call->buffer = NULL; } @@ -161,14 +150,14 @@ static void SRXAFSCB_CallBack(struct work_struct *work) _enter(""); - /* be sure to send the reply *before* attempting to spam the AFS server - * with FSFetchStatus requests on the vnodes with broken callbacks lest - * the AFS server get into a vicious cycle of trying to break further - * callbacks because it hadn't received completion of the CBCallBack op - * yet */ - afs_send_empty_reply(call); + /* We need to break the callbacks before sending the reply as the + * server holds up change visibility till it receives our reply so as + * to maintain cache coherency. + */ + if (call->cm_server) + afs_break_callbacks(call->cm_server, call->count, call->request); - afs_break_callbacks(call->cm_server, call->count, call->request); + afs_send_empty_reply(call); afs_put_call(call); _leave(""); } @@ -180,7 +169,6 @@ static int afs_deliver_cb_callback(struct afs_call *call) { struct afs_callback_break *cb; struct sockaddr_rxrpc srx; - struct afs_server *server; __be32 *bp; int ret, loop; @@ -267,15 +255,6 @@ static int afs_deliver_cb_callback(struct afs_call *call) call->offset = 0; call->unmarshall++; - - /* Record that the message was unmarshalled successfully so - * that the call destructor can know do the callback breaking - * work, even if the final ACK isn't received. - * - * If the step number changes, then afs_cm_destructor() must be - * updated also. - */ - call->unmarshall++; case 5: break; } @@ -286,10 +265,9 @@ static int afs_deliver_cb_callback(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); - server = afs_find_server(call->net, &srx); - if (!server) - return -ENOTCONN; - call->cm_server = server; + call->cm_server = afs_find_server(call->net, &srx); + if (!call->cm_server) + trace_afs_cm_no_server(call, &srx); return afs_queue_call_work(call); } @@ -303,7 +281,8 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) _enter("{%p}", call->cm_server); - afs_init_callback_state(call->cm_server); + if (call->cm_server) + afs_init_callback_state(call->cm_server); afs_send_empty_reply(call); afs_put_call(call); _leave(""); @@ -315,7 +294,6 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) static int afs_deliver_cb_init_call_back_state(struct afs_call *call) { struct sockaddr_rxrpc srx; - struct afs_server *server; int ret; _enter(""); @@ -328,10 +306,9 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - server = afs_find_server(call->net, &srx); - if (!server) - return -ENOTCONN; - call->cm_server = server; + call->cm_server = afs_find_server(call->net, &srx); + if (!call->cm_server) + trace_afs_cm_no_server(call, &srx); return afs_queue_call_work(call); } @@ -341,8 +318,6 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) */ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) { - struct sockaddr_rxrpc srx; - struct afs_server *server; struct afs_uuid *r; unsigned loop; __be32 *b; @@ -398,11 +373,11 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); - server = afs_find_server(call->net, &srx); - if (!server) - return -ENOTCONN; - call->cm_server = server; + rcu_read_lock(); + call->cm_server = afs_find_server_by_uuid(call->net, call->request); + rcu_read_unlock(); + if (!call->cm_server) + trace_afs_cm_no_server_u(call, call->request); return afs_queue_call_work(call); } diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 5889f70d4d27..7d623008157f 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -180,6 +180,7 @@ static int afs_dir_open(struct inode *inode, struct file *file) * get reclaimed during the iteration. */ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key) + __acquires(&dvnode->validate_lock) { struct afs_read *req; loff_t i_size; @@ -261,18 +262,21 @@ retry: /* If we're going to reload, we need to lock all the pages to prevent * races. */ - if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { - ret = -ERESTARTSYS; - for (i = 0; i < req->nr_pages; i++) - if (lock_page_killable(req->pages[i]) < 0) - goto error_unlock; + ret = -ERESTARTSYS; + if (down_read_killable(&dvnode->validate_lock) < 0) + goto error; - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - goto success; + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + goto success; + + up_read(&dvnode->validate_lock); + if (down_write_killable(&dvnode->validate_lock) < 0) + goto error; + if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { ret = afs_fetch_data(dvnode, key, req); if (ret < 0) - goto error_unlock_all; + goto error_unlock; task_io_account_read(PAGE_SIZE * req->nr_pages); @@ -284,33 +288,26 @@ retry: for (i = 0; i < req->nr_pages; i++) if (!afs_dir_check_page(dvnode, req->pages[i], req->actual_len)) - goto error_unlock_all; + goto error_unlock; // TODO: Trim excess pages set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags); } + downgrade_write(&dvnode->validate_lock); success: - i = req->nr_pages; - while (i > 0) - unlock_page(req->pages[--i]); return req; -error_unlock_all: - i = req->nr_pages; error_unlock: - while (i > 0) - unlock_page(req->pages[--i]); + up_write(&dvnode->validate_lock); error: afs_put_read(req); _leave(" = %d", ret); return ERR_PTR(ret); content_has_grown: - i = req->nr_pages; - while (i > 0) - unlock_page(req->pages[--i]); + up_write(&dvnode->validate_lock); afs_put_read(req); goto retry; } @@ -473,6 +470,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, } out: + up_read(&dvnode->validate_lock); afs_put_read(req); _leave(" = %d", ret); return ret; @@ -1143,7 +1141,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_create(&fc, dentry->d_name.name, mode, data_version, &newfid, &newstatus, &newcb); } @@ -1213,7 +1211,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_remove(&fc, dentry->d_name.name, true, data_version); } @@ -1316,7 +1314,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_remove(&fc, dentry->d_name.name, false, data_version); } @@ -1373,7 +1371,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_create(&fc, dentry->d_name.name, mode, data_version, &newfid, &newstatus, &newcb); } @@ -1443,8 +1441,8 @@ static int afs_link(struct dentry *from, struct inode *dir, } while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; - fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); + fc.cb_break_2 = afs_calc_vnode_cb_break(vnode); afs_fs_link(&fc, vnode, dentry->d_name.name, data_version); } @@ -1512,7 +1510,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_symlink(&fc, dentry->d_name.name, content, data_version, &newfid, &newstatus); @@ -1588,8 +1586,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, } } while (afs_select_fileserver(&fc)) { - fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break; - fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(orig_dvnode); + fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode); afs_fs_rename(&fc, old_dentry->d_name.name, new_dvnode, new_dentry->d_name.name, orig_data_version, new_data_version); diff --git a/fs/afs/file.c b/fs/afs/file.c index c24c08016dd9..7d4f26198573 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -238,7 +238,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_fetch_data(&fc, desc); } diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 7a0e017070ec..dc62d15a964b 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -86,7 +86,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_set_lock(&fc, type); } @@ -117,7 +117,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_current_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_extend_lock(&fc); } @@ -148,7 +148,7 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_current_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_release_lock(&fc); } diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index efacdb7c1dee..b273e1d60478 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -134,6 +134,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, struct afs_read *read_req) { const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp; + bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus); u64 data_version, size; u32 type, abort_code; u8 flags = 0; @@ -142,13 +143,32 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, if (vnode) write_seqlock(&vnode->cb_lock); + abort_code = ntohl(xdr->abort_code); + if (xdr->if_version != htonl(AFS_FSTATUS_VERSION)) { + if (xdr->if_version == htonl(0) && + abort_code != 0 && + inline_error) { + /* The OpenAFS fileserver has a bug in FS.InlineBulkStatus + * whereby it doesn't set the interface version in the error + * case. + */ + status->abort_code = abort_code; + ret = 0; + goto out; + } + pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version)); goto bad; } + if (abort_code != 0 && inline_error) { + status->abort_code = abort_code; + ret = 0; + goto out; + } + type = ntohl(xdr->type); - abort_code = ntohl(xdr->abort_code); switch (type) { case AFS_FTYPE_FILE: case AFS_FTYPE_DIR: @@ -165,13 +185,6 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, } status->type = type; break; - case AFS_FTYPE_INVALID: - if (abort_code != 0) { - status->abort_code = abort_code; - ret = 0; - goto out; - } - /* Fall through */ default: goto bad; } @@ -248,7 +261,7 @@ static void xdr_decode_AFSCallBack(struct afs_call *call, write_seqlock(&vnode->cb_lock); - if (call->cb_break == (vnode->cb_break + cbi->server->cb_s_break)) { + if (call->cb_break == afs_cb_break_sum(vnode, cbi)) { vnode->cb_version = ntohl(*bp++); cb_expiry = ntohl(*bp++); vnode->cb_type = ntohl(*bp++); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 06194cfe9724..479b7fdda124 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -108,7 +108,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_fetch_file_status(&fc, NULL, new_inode); } @@ -393,15 +393,18 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) read_seqlock_excl(&vnode->cb_lock); if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { - if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) { + if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break || + vnode->cb_v_break != vnode->volume->cb_v_break) { vnode->cb_s_break = vnode->cb_interest->server->cb_s_break; + vnode->cb_v_break = vnode->volume->cb_v_break; + valid = false; } else if (vnode->status.type == AFS_FTYPE_DIR && test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) && vnode->cb_expires_at - 10 > now) { - valid = true; + valid = true; } else if (!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) && vnode->cb_expires_at - 10 > now) { - valid = true; + valid = true; } } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { valid = true; @@ -415,7 +418,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) if (valid) goto valid; - mutex_lock(&vnode->validate_lock); + down_write(&vnode->validate_lock); /* if the promise has expired, we need to check the server again to get * a new promise - note that if the (parent) directory's metadata was @@ -444,13 +447,13 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) * different */ if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) afs_zap_data(vnode); - mutex_unlock(&vnode->validate_lock); + up_write(&vnode->validate_lock); valid: _leave(" = 0"); return 0; error_unlock: - mutex_unlock(&vnode->validate_lock); + up_write(&vnode->validate_lock); _leave(" = %d", ret); return ret; } @@ -574,7 +577,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_setattr(&fc, attr); } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index f8086ec95e24..e3f8a46663db 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -396,6 +396,7 @@ struct afs_server { #define AFS_SERVER_FL_PROBED 5 /* The fileserver has been probed */ #define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */ #define AFS_SERVER_FL_NO_IBULK 7 /* Fileserver doesn't support FS.InlineBulkStatus */ +#define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */ atomic_t usage; u32 addr_version; /* Address list version */ @@ -433,6 +434,7 @@ struct afs_server_list { unsigned short index; /* Server currently in use */ unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */ unsigned int seq; /* Set to ->servers_seq when installed */ + rwlock_t lock; struct afs_server_entry servers[]; }; @@ -459,6 +461,9 @@ struct afs_volume { rwlock_t servers_lock; /* Lock for ->servers */ unsigned int servers_seq; /* Incremented each time ->servers changes */ + unsigned cb_v_break; /* Break-everything counter. */ + rwlock_t cb_break_lock; + afs_voltype_t type; /* type of volume */ short error; char type_force; /* force volume type (suppress R/O -> R/W) */ @@ -494,7 +499,7 @@ struct afs_vnode { #endif struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */ struct mutex io_lock; /* Lock for serialising I/O on this mutex */ - struct mutex validate_lock; /* lock for validating this vnode */ + struct rw_semaphore validate_lock; /* lock for validating this vnode */ spinlock_t wb_lock; /* lock for wb_keys */ spinlock_t lock; /* waitqueue/flags lock */ unsigned long flags; @@ -519,6 +524,7 @@ struct afs_vnode { /* outstanding callback notification on this file */ struct afs_cb_interest *cb_interest; /* Server on which this resides */ unsigned int cb_s_break; /* Mass break counter on ->server */ + unsigned int cb_v_break; /* Mass break counter on ->volume */ unsigned int cb_break; /* Break counter on vnode */ seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */ @@ -648,16 +654,29 @@ extern void afs_init_callback_state(struct afs_server *); extern void afs_break_callback(struct afs_vnode *); extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*); -extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *); +extern int afs_register_server_cb_interest(struct afs_vnode *, + struct afs_server_list *, unsigned int); extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *); extern void afs_clear_callback_interests(struct afs_net *, struct afs_server_list *); static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi) { - refcount_inc(&cbi->usage); + if (cbi) + refcount_inc(&cbi->usage); return cbi; } +static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode) +{ + return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break; +} + +static inline unsigned int afs_cb_break_sum(struct afs_vnode *vnode, + struct afs_cb_interest *cbi) +{ + return vnode->cb_break + cbi->server->cb_s_break + vnode->volume->cb_v_break; +} + /* * cell.c */ diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index ac0feac9d746..e065bc0768e6 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -179,7 +179,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) */ if (fc->flags & AFS_FS_CURSOR_VNOVOL) { fc->ac.error = -EREMOTEIO; - goto failed; + goto next_server; } write_lock(&vnode->volume->servers_lock); @@ -201,7 +201,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) */ if (vnode->volume->servers == fc->server_list) { fc->ac.error = -EREMOTEIO; - goto failed; + goto next_server; } /* Try again */ @@ -350,8 +350,8 @@ use_server: * break request before we've finished decoding the reply and * installing the vnode. */ - fc->ac.error = afs_register_server_cb_interest( - vnode, &fc->server_list->servers[fc->index]); + fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list, + fc->index); if (fc->ac.error < 0) goto failed; @@ -369,8 +369,16 @@ use_server: if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) { fc->ac.alist = afs_get_addrlist(alist); - if (!afs_probe_fileserver(fc)) - goto failed; + if (!afs_probe_fileserver(fc)) { + switch (fc->ac.error) { + case -ENOMEM: + case -ERESTARTSYS: + case -EINTR: + goto failed; + default: + goto next_server; + } + } } if (!fc->ac.alist) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 5c6263972ec9..08735948f15d 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -41,6 +41,7 @@ int afs_open_socket(struct afs_net *net) { struct sockaddr_rxrpc srx; struct socket *socket; + unsigned int min_level; int ret; _enter(""); @@ -60,6 +61,12 @@ int afs_open_socket(struct afs_net *net) srx.transport.sin6.sin6_family = AF_INET6; srx.transport.sin6.sin6_port = htons(AFS_CM_PORT); + min_level = RXRPC_SECURITY_ENCRYPT; + ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL, + (void *)&min_level, sizeof(min_level)); + if (ret < 0) + goto error_2; + ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); if (ret == -EADDRINUSE) { srx.transport.sin6.sin6_port = 0; @@ -482,8 +489,12 @@ static void afs_deliver_to_call(struct afs_call *call) state = READ_ONCE(call->state); switch (ret) { case 0: - if (state == AFS_CALL_CL_PROC_REPLY) + if (state == AFS_CALL_CL_PROC_REPLY) { + if (call->cbi) + set_bit(AFS_SERVER_FL_MAY_HAVE_CB, + &call->cbi->server->flags); goto call_complete; + } ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY); goto done; case -EINPROGRESS: @@ -493,11 +504,6 @@ static void afs_deliver_to_call(struct afs_call *call) case -ECONNABORTED: ASSERTCMP(state, ==, AFS_CALL_COMPLETE); goto done; - case -ENOTCONN: - abort_code = RX_CALL_DEAD; - rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, ret, "KNC"); - goto local_abort; case -ENOTSUPP: abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, diff --git a/fs/afs/security.c b/fs/afs/security.c index cea2fff313dc..1992b0ffa543 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -147,8 +147,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, break; } - if (cb_break != (vnode->cb_break + - vnode->cb_interest->server->cb_s_break)) { + if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) { changed = true; break; } @@ -178,7 +177,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, } } - if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break)) + if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) goto someone_else_changed_it; /* We need a ref on any permits list we want to copy as we'll have to @@ -257,7 +256,7 @@ found: spin_lock(&vnode->lock); zap = rcu_access_pointer(vnode->permit_cache); - if (cb_break == (vnode->cb_break + vnode->cb_interest->server->cb_s_break) && + if (cb_break == afs_cb_break_sum(vnode, vnode->cb_interest) && zap == permits) rcu_assign_pointer(vnode->permit_cache, replacement); else diff --git a/fs/afs/server.c b/fs/afs/server.c index 629c74986cff..3af4625e2f8c 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -67,12 +67,6 @@ struct afs_server *afs_find_server(struct afs_net *net, sizeof(struct in6_addr)); if (diff == 0) goto found; - if (diff < 0) { - // TODO: Sort the list - //if (i == alist->nr_ipv4) - // goto not_found; - break; - } } } } else { @@ -87,17 +81,10 @@ struct afs_server *afs_find_server(struct afs_net *net, (u32 __force)b->sin6_addr.s6_addr32[3]); if (diff == 0) goto found; - if (diff < 0) { - // TODO: Sort the list - //if (i == 0) - // goto not_found; - break; - } } } } - //not_found: server = NULL; found: if (server && !atomic_inc_not_zero(&server->usage)) @@ -395,14 +382,16 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) struct afs_addr_list *alist = rcu_access_pointer(server->addresses); struct afs_addr_cursor ac = { .alist = alist, - .addr = &alist->addrs[0], .start = alist->index, - .index = alist->index, + .index = 0, + .addr = &alist->addrs[alist->index], .error = 0, }; _enter("%p", server); - afs_fs_give_up_all_callbacks(net, server, &ac, NULL); + if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags)) + afs_fs_give_up_all_callbacks(net, server, &ac, NULL); + call_rcu(&server->rcu, afs_server_rcu); afs_dec_servers_outstanding(net); } diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index 0f8dc4c8f07c..8a5760aa5832 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -49,6 +49,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, goto error; refcount_set(&slist->usage, 1); + rwlock_init(&slist->lock); /* Make sure a records exists for each server in the list. */ for (i = 0; i < vldb->nr_servers; i++) { @@ -64,9 +65,11 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, goto error_2; } - /* Insertion-sort by server pointer */ + /* Insertion-sort by UUID */ for (j = 0; j < slist->nr_servers; j++) - if (slist->servers[j].server >= server) + if (memcmp(&slist->servers[j].server->uuid, + &server->uuid, + sizeof(server->uuid)) >= 0) break; if (j < slist->nr_servers) { if (slist->servers[j].server == server) { diff --git a/fs/afs/super.c b/fs/afs/super.c index 65081ec3c36e..9e5d7966621c 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -590,7 +590,7 @@ static void afs_i_init_once(void *_vnode) memset(vnode, 0, sizeof(*vnode)); inode_init_once(&vnode->vfs_inode); mutex_init(&vnode->io_lock); - mutex_init(&vnode->validate_lock); + init_rwsem(&vnode->validate_lock); spin_lock_init(&vnode->wb_lock); spin_lock_init(&vnode->lock); INIT_LIST_HEAD(&vnode->wb_keys); @@ -688,7 +688,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) if (afs_begin_vnode_operation(&fc, vnode, key)) { fc.flags |= AFS_FS_CURSOR_NO_VSLEEP; while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_get_volume_status(&fc, &vs); } diff --git a/fs/afs/write.c b/fs/afs/write.c index c164698dc304..8b39e6ebb40b 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -351,7 +351,7 @@ found_key: ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_store_data(&fc, mapping, first, last, offset, to); } @@ -1078,8 +1078,8 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) ctx = rcu_dereference(table->table[id]); if (ctx && ctx->user_id == ctx_id) { - percpu_ref_get(&ctx->users); - ret = ctx; + if (percpu_ref_tryget_live(&ctx->users)) + ret = ctx; } out: rcu_read_unlock(); diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index af2832aaeec5..4700b4534439 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -198,23 +198,16 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) if (ret == BEFS_BT_NOT_FOUND) { befs_debug(sb, "<--- %s %pd not found", __func__, dentry); - d_add(dentry, NULL); - return ERR_PTR(-ENOENT); - + inode = NULL; } else if (ret != BEFS_OK || offset == 0) { befs_error(sb, "<--- %s Error", __func__); - return ERR_PTR(-ENODATA); + inode = ERR_PTR(-ENODATA); + } else { + inode = befs_iget(dir->i_sb, (ino_t) offset); } - - inode = befs_iget(dir->i_sb, (ino_t) offset); - if (IS_ERR(inode)) - return ERR_CAST(inode); - - d_add(dentry, inode); - befs_debug(sb, "<--- %s", __func__); - return NULL; + return d_splice_alias(inode, dentry); } static int diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3fd44835b386..8c68961925b1 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2436,10 +2436,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (p->reada != READA_NONE) reada_for_search(fs_info, p, level, slot, key->objectid); - btrfs_release_path(p); - ret = -EAGAIN; - tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1, + tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1, &first_key); if (!IS_ERR(tmp)) { /* @@ -2454,6 +2452,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, } else { ret = PTR_ERR(tmp); } + + btrfs_release_path(p); return ret; } @@ -5414,12 +5414,24 @@ int btrfs_compare_trees(struct btrfs_root *left_root, down_read(&fs_info->commit_root_sem); left_level = btrfs_header_level(left_root->commit_root); left_root_level = left_level; - left_path->nodes[left_level] = left_root->commit_root; + left_path->nodes[left_level] = + btrfs_clone_extent_buffer(left_root->commit_root); + if (!left_path->nodes[left_level]) { + up_read(&fs_info->commit_root_sem); + ret = -ENOMEM; + goto out; + } extent_buffer_get(left_path->nodes[left_level]); right_level = btrfs_header_level(right_root->commit_root); right_root_level = right_level; - right_path->nodes[right_level] = right_root->commit_root; + right_path->nodes[right_level] = + btrfs_clone_extent_buffer(right_root->commit_root); + if (!right_path->nodes[right_level]) { + up_read(&fs_info->commit_root_sem); + ret = -ENOMEM; + goto out; + } extent_buffer_get(right_path->nodes[right_level]); up_read(&fs_info->commit_root_sem); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2771cc56a622..0d422c9908b8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3182,6 +3182,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, u64 *orig_start, u64 *orig_block_len, u64 *ram_bytes); +void __btrfs_del_delalloc_inode(struct btrfs_root *root, + struct btrfs_inode *inode); struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index); int btrfs_unlink_inode(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 60caa68c3618..c3504b4d281b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3818,6 +3818,7 @@ void close_ctree(struct btrfs_fs_info *fs_info) set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags); btrfs_free_qgroup_config(fs_info); + ASSERT(list_empty(&fs_info->delalloc_roots)); if (percpu_counter_sum(&fs_info->delalloc_bytes)) { btrfs_info(fs_info, "at unmount delalloc count %lld", @@ -4125,15 +4126,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info) static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info) { + /* cleanup FS via transaction */ + btrfs_cleanup_transaction(fs_info); + mutex_lock(&fs_info->cleaner_mutex); btrfs_run_delayed_iputs(fs_info); mutex_unlock(&fs_info->cleaner_mutex); down_write(&fs_info->cleanup_work_sem); up_write(&fs_info->cleanup_work_sem); - - /* cleanup FS via transaction */ - btrfs_cleanup_transaction(fs_info); } static void btrfs_destroy_ordered_extents(struct btrfs_root *root) @@ -4258,19 +4259,23 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) list_splice_init(&root->delalloc_inodes, &splice); while (!list_empty(&splice)) { + struct inode *inode = NULL; btrfs_inode = list_first_entry(&splice, struct btrfs_inode, delalloc_inodes); - - list_del_init(&btrfs_inode->delalloc_inodes); - clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, - &btrfs_inode->runtime_flags); + __btrfs_del_delalloc_inode(root, btrfs_inode); spin_unlock(&root->delalloc_lock); - btrfs_invalidate_inodes(btrfs_inode->root); - + /* + * Make sure we get a live inode and that it'll not disappear + * meanwhile. + */ + inode = igrab(&btrfs_inode->vfs_inode); + if (inode) { + invalidate_inode_pages2(inode->i_mapping); + iput(inode); + } spin_lock(&root->delalloc_lock); } - spin_unlock(&root->delalloc_lock); } @@ -4286,7 +4291,6 @@ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info) while (!list_empty(&splice)) { root = list_first_entry(&splice, struct btrfs_root, delalloc_root); - list_del_init(&root->delalloc_root); root = btrfs_grab_fs_root(root); BUG_ON(!root); spin_unlock(&fs_info->delalloc_root_lock); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d241285a0d2a..0b86cf10cf2a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1742,12 +1742,12 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root, spin_unlock(&root->delalloc_lock); } -static void btrfs_del_delalloc_inode(struct btrfs_root *root, - struct btrfs_inode *inode) + +void __btrfs_del_delalloc_inode(struct btrfs_root *root, + struct btrfs_inode *inode) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); - spin_lock(&root->delalloc_lock); if (!list_empty(&inode->delalloc_inodes)) { list_del_init(&inode->delalloc_inodes); clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, @@ -1760,6 +1760,13 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root, spin_unlock(&fs_info->delalloc_root_lock); } } +} + +static void btrfs_del_delalloc_inode(struct btrfs_root *root, + struct btrfs_inode *inode) +{ + spin_lock(&root->delalloc_lock); + __btrfs_del_delalloc_inode(root, inode); spin_unlock(&root->delalloc_lock); } @@ -6579,8 +6586,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock_inode; } else { btrfs_update_inode(trans, root, inode); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); } out_unlock: @@ -6656,8 +6662,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock_inode; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); out_unlock: btrfs_end_transaction(trans); @@ -6802,12 +6807,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (err) goto out_fail_inode; - d_instantiate(dentry, inode); - /* - * mkdir is special. We're unlocking after we call d_instantiate - * to avoid a race with nfsd calling d_instantiate. - */ - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); drop_on_err = 0; out_fail: @@ -9117,7 +9117,8 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback) BTRFS_EXTENT_DATA_KEY); trans->block_rsv = &fs_info->trans_block_rsv; if (ret != -ENOSPC && ret != -EAGAIN) { - err = ret; + if (ret < 0) + err = ret; break; } @@ -10250,8 +10251,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock_inode; } - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); out_unlock: btrfs_end_transaction(trans); diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 53a8c95828e3..dc6140013ae8 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -380,6 +380,7 @@ static int prop_compression_apply(struct inode *inode, const char *value, size_t len) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); int type; if (len == 0) { @@ -390,14 +391,17 @@ static int prop_compression_apply(struct inode *inode, return 0; } - if (!strncmp("lzo", value, 3)) + if (!strncmp("lzo", value, 3)) { type = BTRFS_COMPRESS_LZO; - else if (!strncmp("zlib", value, 4)) + btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); + } else if (!strncmp("zlib", value, 4)) { type = BTRFS_COMPRESS_ZLIB; - else if (!strncmp("zstd", value, len)) + } else if (!strncmp("zstd", value, len)) { type = BTRFS_COMPRESS_ZSTD; - else + btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); + } else { return -EINVAL; + } BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 43758e30aa7a..8f23a94dab77 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4320,6 +4320,110 @@ static int log_one_extent(struct btrfs_trans_handle *trans, return ret; } +/* + * Log all prealloc extents beyond the inode's i_size to make sure we do not + * lose them after doing a fast fsync and replaying the log. We scan the + * subvolume's root instead of iterating the inode's extent map tree because + * otherwise we can log incorrect extent items based on extent map conversion. + * That can happen due to the fact that extent maps are merged when they + * are not in the extent map tree's list of modified extents. + */ +static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode, + struct btrfs_path *path) +{ + struct btrfs_root *root = inode->root; + struct btrfs_key key; + const u64 i_size = i_size_read(&inode->vfs_inode); + const u64 ino = btrfs_ino(inode); + struct btrfs_path *dst_path = NULL; + u64 last_extent = (u64)-1; + int ins_nr = 0; + int start_slot; + int ret; + + if (!(inode->flags & BTRFS_INODE_PREALLOC)) + return 0; + + key.objectid = ino; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = i_size; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + + while (true) { + struct extent_buffer *leaf = path->nodes[0]; + int slot = path->slots[0]; + + if (slot >= btrfs_header_nritems(leaf)) { + if (ins_nr > 0) { + ret = copy_items(trans, inode, dst_path, path, + &last_extent, start_slot, + ins_nr, 1, 0); + if (ret < 0) + goto out; + ins_nr = 0; + } + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + break; + } + continue; + } + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid > ino) + break; + if (WARN_ON_ONCE(key.objectid < ino) || + key.type < BTRFS_EXTENT_DATA_KEY || + key.offset < i_size) { + path->slots[0]++; + continue; + } + if (last_extent == (u64)-1) { + last_extent = key.offset; + /* + * Avoid logging extent items logged in past fsync calls + * and leading to duplicate keys in the log tree. + */ + do { + ret = btrfs_truncate_inode_items(trans, + root->log_root, + &inode->vfs_inode, + i_size, + BTRFS_EXTENT_DATA_KEY); + } while (ret == -EAGAIN); + if (ret) + goto out; + } + if (ins_nr == 0) + start_slot = slot; + ins_nr++; + path->slots[0]++; + if (!dst_path) { + dst_path = btrfs_alloc_path(); + if (!dst_path) { + ret = -ENOMEM; + goto out; + } + } + } + if (ins_nr > 0) { + ret = copy_items(trans, inode, dst_path, path, &last_extent, + start_slot, ins_nr, 1, 0); + if (ret > 0) + ret = 0; + } +out: + btrfs_release_path(path); + btrfs_free_path(dst_path); + return ret; +} + static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_inode *inode, @@ -4362,6 +4466,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, if (em->generation <= test_gen) continue; + /* We log prealloc extents beyond eof later. */ + if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) && + em->start >= i_size_read(&inode->vfs_inode)) + continue; + if (em->start < logged_start) logged_start = em->start; if ((em->start + em->len - 1) > logged_end) @@ -4374,31 +4483,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, num++; } - /* - * Add all prealloc extents beyond the inode's i_size to make sure we - * don't lose them after doing a fast fsync and replaying the log. - */ - if (inode->flags & BTRFS_INODE_PREALLOC) { - struct rb_node *node; - - for (node = rb_last(&tree->map); node; node = rb_prev(node)) { - em = rb_entry(node, struct extent_map, rb_node); - if (em->start < i_size_read(&inode->vfs_inode)) - break; - if (!list_empty(&em->list)) - continue; - /* Same as above loop. */ - if (++num > 32768) { - list_del_init(&tree->modified_extents); - ret = -EFBIG; - goto process; - } - refcount_inc(&em->refs); - set_bit(EXTENT_FLAG_LOGGING, &em->flags); - list_add_tail(&em->list, &extents); - } - } - list_sort(NULL, &extents, extent_cmp); btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end); /* @@ -4443,6 +4527,9 @@ process: up_write(&inode->dio_sem); btrfs_release_path(path); + if (!ret) + ret = btrfs_log_prealloc_extents(trans, inode, path); + return ret; } @@ -4827,6 +4914,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct extent_map_tree *em_tree = &inode->extent_tree; u64 logged_isize = 0; bool need_log_inode_item = true; + bool xattrs_logged = false; path = btrfs_alloc_path(); if (!path) @@ -5128,6 +5216,7 @@ next_key: err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); if (err) goto out_unlock; + xattrs_logged = true; if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { btrfs_release_path(path); btrfs_release_path(dst_path); @@ -5140,6 +5229,11 @@ log_extents: btrfs_release_path(dst_path); if (need_log_inode_item) { err = log_inode_item(trans, log, dst_path, inode); + if (!err && !xattrs_logged) { + err = btrfs_log_all_xattrs(trans, root, inode, path, + dst_path); + btrfs_release_path(path); + } if (err) goto out_unlock; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 292266f6ab9c..be3fc701f389 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4052,6 +4052,15 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) return 0; } + /* + * A ro->rw remount sequence should continue with the paused balance + * regardless of who pauses it, system or the user as of now, so set + * the resume flag. + */ + spin_lock(&fs_info->balance_lock); + fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME; + spin_unlock(&fs_info->balance_lock); + tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); return PTR_ERR_OR_ZERO(tsk); } diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 0daa1e3fe0df..ab0bbe93b398 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -572,6 +572,11 @@ lookup_again: if (ret < 0) goto create_error; + if (unlikely(d_unhashed(next))) { + dput(next); + inode_unlock(d_inode(dir)); + goto lookup_again; + } ASSERT(d_backing_inode(next)); _debug("mkdir -> %p{%p{ino=%lu}}", @@ -764,6 +769,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, /* search the current directory for the element name */ inode_lock(d_inode(dir)); +retry: start = jiffies; subdir = lookup_one_len(dirname, dir, strlen(dirname)); cachefiles_hist(cachefiles_lookup_histogram, start); @@ -793,6 +799,10 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, if (ret < 0) goto mkdir_error; + if (unlikely(d_unhashed(subdir))) { + dput(subdir); + goto retry; + } ASSERT(d_backing_inode(subdir)); _debug("mkdir -> %p{%p{ino=%lu}}", diff --git a/fs/ceph/file.c b/fs/ceph/file.c index f85040d73e3d..cf0e45b10121 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -70,69 +70,104 @@ static __le32 ceph_flags_sys2wire(u32 flags) */ /* - * Calculate the length sum of direct io vectors that can - * be combined into one page vector. + * How many pages to get in one call to iov_iter_get_pages(). This + * determines the size of the on-stack array used as a buffer. */ -static size_t dio_get_pagev_size(const struct iov_iter *it) +#define ITER_GET_BVECS_PAGES 64 + +static ssize_t __iter_get_bvecs(struct iov_iter *iter, size_t maxsize, + struct bio_vec *bvecs) { - const struct iovec *iov = it->iov; - const struct iovec *iovend = iov + it->nr_segs; - size_t size; - - size = iov->iov_len - it->iov_offset; - /* - * An iov can be page vectored when both the current tail - * and the next base are page aligned. - */ - while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) && - (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) { - size += iov->iov_len; - } - dout("dio_get_pagevlen len = %zu\n", size); - return size; + size_t size = 0; + int bvec_idx = 0; + + if (maxsize > iov_iter_count(iter)) + maxsize = iov_iter_count(iter); + + while (size < maxsize) { + struct page *pages[ITER_GET_BVECS_PAGES]; + ssize_t bytes; + size_t start; + int idx = 0; + + bytes = iov_iter_get_pages(iter, pages, maxsize - size, + ITER_GET_BVECS_PAGES, &start); + if (bytes < 0) + return size ?: bytes; + + iov_iter_advance(iter, bytes); + size += bytes; + + for ( ; bytes; idx++, bvec_idx++) { + struct bio_vec bv = { + .bv_page = pages[idx], + .bv_len = min_t(int, bytes, PAGE_SIZE - start), + .bv_offset = start, + }; + + bvecs[bvec_idx] = bv; + bytes -= bv.bv_len; + start = 0; + } + } + + return size; } /* - * Allocate a page vector based on (@it, @nbytes). - * The return value is the tuple describing a page vector, - * that is (@pages, @page_align, @num_pages). + * iov_iter_get_pages() only considers one iov_iter segment, no matter + * what maxsize or maxpages are given. For ITER_BVEC that is a single + * page. + * + * Attempt to get up to @maxsize bytes worth of pages from @iter. + * Return the number of bytes in the created bio_vec array, or an error. */ -static struct page ** -dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes, - size_t *page_align, int *num_pages) +static ssize_t iter_get_bvecs_alloc(struct iov_iter *iter, size_t maxsize, + struct bio_vec **bvecs, int *num_bvecs) { - struct iov_iter tmp_it = *it; - size_t align; - struct page **pages; - int ret = 0, idx, npages; + struct bio_vec *bv; + size_t orig_count = iov_iter_count(iter); + ssize_t bytes; + int npages; - align = (unsigned long)(it->iov->iov_base + it->iov_offset) & - (PAGE_SIZE - 1); - npages = calc_pages_for(align, nbytes); - pages = kvmalloc(sizeof(*pages) * npages, GFP_KERNEL); - if (!pages) - return ERR_PTR(-ENOMEM); + iov_iter_truncate(iter, maxsize); + npages = iov_iter_npages(iter, INT_MAX); + iov_iter_reexpand(iter, orig_count); - for (idx = 0; idx < npages; ) { - size_t start; - ret = iov_iter_get_pages(&tmp_it, pages + idx, nbytes, - npages - idx, &start); - if (ret < 0) - goto fail; + /* + * __iter_get_bvecs() may populate only part of the array -- zero it + * out. + */ + bv = kvmalloc_array(npages, sizeof(*bv), GFP_KERNEL | __GFP_ZERO); + if (!bv) + return -ENOMEM; - iov_iter_advance(&tmp_it, ret); - nbytes -= ret; - idx += (ret + start + PAGE_SIZE - 1) / PAGE_SIZE; + bytes = __iter_get_bvecs(iter, maxsize, bv); + if (bytes < 0) { + /* + * No pages were pinned -- just free the array. + */ + kvfree(bv); + return bytes; } - BUG_ON(nbytes != 0); - *num_pages = npages; - *page_align = align; - dout("dio_get_pages_alloc: got %d pages align %zu\n", npages, align); - return pages; -fail: - ceph_put_page_vector(pages, idx, false); - return ERR_PTR(ret); + *bvecs = bv; + *num_bvecs = npages; + return bytes; +} + +static void put_bvecs(struct bio_vec *bvecs, int num_bvecs, bool should_dirty) +{ + int i; + + for (i = 0; i < num_bvecs; i++) { + if (bvecs[i].bv_page) { + if (should_dirty) + set_page_dirty_lock(bvecs[i].bv_page); + put_page(bvecs[i].bv_page); + } + } + kvfree(bvecs); } /* @@ -746,11 +781,12 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) struct inode *inode = req->r_inode; struct ceph_aio_request *aio_req = req->r_priv; struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0); - int num_pages = calc_pages_for((u64)osd_data->alignment, - osd_data->length); - dout("ceph_aio_complete_req %p rc %d bytes %llu\n", - inode, rc, osd_data->length); + BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS); + BUG_ON(!osd_data->num_bvecs); + + dout("ceph_aio_complete_req %p rc %d bytes %u\n", + inode, rc, osd_data->bvec_pos.iter.bi_size); if (rc == -EOLDSNAPC) { struct ceph_aio_work *aio_work; @@ -768,9 +804,10 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) } else if (!aio_req->write) { if (rc == -ENOENT) rc = 0; - if (rc >= 0 && osd_data->length > rc) { - int zoff = osd_data->alignment + rc; - int zlen = osd_data->length - rc; + if (rc >= 0 && osd_data->bvec_pos.iter.bi_size > rc) { + struct iov_iter i; + int zlen = osd_data->bvec_pos.iter.bi_size - rc; + /* * If read is satisfied by single OSD request, * it can pass EOF. Otherwise read is within @@ -785,13 +822,16 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) aio_req->total_len = rc + zlen; } - if (zlen > 0) - ceph_zero_page_vector_range(zoff, zlen, - osd_data->pages); + iov_iter_bvec(&i, ITER_BVEC, osd_data->bvec_pos.bvecs, + osd_data->num_bvecs, + osd_data->bvec_pos.iter.bi_size); + iov_iter_advance(&i, rc); + iov_iter_zero(zlen, &i); } } - ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty); + put_bvecs(osd_data->bvec_pos.bvecs, osd_data->num_bvecs, + aio_req->should_dirty); ceph_osdc_put_request(req); if (rc < 0) @@ -879,7 +919,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_vino vino; struct ceph_osd_request *req; - struct page **pages; + struct bio_vec *bvecs; struct ceph_aio_request *aio_req = NULL; int num_pages = 0; int flags; @@ -914,10 +954,14 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, } while (iov_iter_count(iter) > 0) { - u64 size = dio_get_pagev_size(iter); - size_t start = 0; + u64 size = iov_iter_count(iter); ssize_t len; + if (write) + size = min_t(u64, size, fsc->mount_options->wsize); + else + size = min_t(u64, size, fsc->mount_options->rsize); + vino = ceph_vino(inode); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, pos, &size, 0, @@ -933,18 +977,14 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, break; } - if (write) - size = min_t(u64, size, fsc->mount_options->wsize); - else - size = min_t(u64, size, fsc->mount_options->rsize); - - len = size; - pages = dio_get_pages_alloc(iter, len, &start, &num_pages); - if (IS_ERR(pages)) { + len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages); + if (len < 0) { ceph_osdc_put_request(req); - ret = PTR_ERR(pages); + ret = len; break; } + if (len != size) + osd_req_op_extent_update(req, 0, len); /* * To simplify error handling, allow AIO when IO within i_size @@ -977,8 +1017,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, req->r_mtime = mtime; } - osd_req_op_extent_osd_data_pages(req, 0, pages, len, start, - false, false); + osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len); if (aio_req) { aio_req->total_len += len; @@ -991,7 +1030,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, list_add_tail(&req->r_unsafe_item, &aio_req->osd_reqs); pos += len; - iov_iter_advance(iter, len); continue; } @@ -1004,25 +1042,26 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, if (ret == -ENOENT) ret = 0; if (ret >= 0 && ret < len && pos + ret < size) { + struct iov_iter i; int zlen = min_t(size_t, len - ret, size - pos - ret); - ceph_zero_page_vector_range(start + ret, zlen, - pages); + + iov_iter_bvec(&i, ITER_BVEC, bvecs, num_pages, + len); + iov_iter_advance(&i, ret); + iov_iter_zero(zlen, &i); ret += zlen; } if (ret >= 0) len = ret; } - ceph_put_page_vector(pages, num_pages, should_dirty); - + put_bvecs(bvecs, num_pages, should_dirty); ceph_osdc_put_request(req); if (ret < 0) break; pos += len; - iov_iter_advance(iter, len); - if (!write && pos >= size) break; diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 5f132d59dfc2..d61e2de8d0eb 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -197,7 +197,7 @@ config CIFS_SMB311 config CIFS_SMB_DIRECT bool "SMB Direct support (Experimental)" - depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y + depends on CIFS=m && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND_ADDR_TRANS=y help Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1. SMB Direct allows transferring SMB packets over RDMA. If unsure, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f715609b13f3..5a5a0158cc8f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1047,6 +1047,18 @@ out: return rc; } +/* + * Directory operations under CIFS/SMB2/SMB3 are synchronous, so fsync() + * is a dummy operation. + */ +static int cifs_dir_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + cifs_dbg(FYI, "Sync directory - name: %pD datasync: 0x%x\n", + file, datasync); + + return 0; +} + static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, struct file *dst_file, loff_t destoff, size_t len, unsigned int flags) @@ -1181,6 +1193,7 @@ const struct file_operations cifs_dir_ops = { .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .llseek = generic_file_llseek, + .fsync = cifs_dir_fsync, }; static void diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a5aa158d535a..7a10a5d0731f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1977,14 +1977,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, goto cifs_parse_mount_err; } -#ifdef CONFIG_CIFS_SMB_DIRECT - if (vol->rdma && vol->sign) { - cifs_dbg(VFS, "Currently SMB direct doesn't support signing." - " This is being fixed\n"); - goto cifs_parse_mount_err; - } -#endif - #ifndef CONFIG_KEYS /* Muliuser mounts require CONFIG_KEYS support */ if (vol->multiuser) { diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b76b85881dcc..9c6d95ffca97 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -589,9 +589,15 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); + /* + * If ea_name is NULL (listxattr) and there are no EAs, return 0 as it's + * not an error. Otherwise, the specified ea_name was not found. + */ if (!rc) rc = move_smb2_ea_to_cifs(ea_data, buf_size, smb2_data, SMB2_MAX_EA_BUF, ea_name); + else if (!ea_name && rc == -ENODATA) + rc = 0; kfree(smb2_data); return rc; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 60db51bae0e3..0f48741a0130 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -730,19 +730,14 @@ neg_exit: int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) { - int rc = 0; - struct validate_negotiate_info_req vneg_inbuf; + int rc; + struct validate_negotiate_info_req *pneg_inbuf; struct validate_negotiate_info_rsp *pneg_rsp = NULL; u32 rsplen; u32 inbuflen; /* max of 4 dialects */ cifs_dbg(FYI, "validate negotiate\n"); -#ifdef CONFIG_CIFS_SMB_DIRECT - if (tcon->ses->server->rdma) - return 0; -#endif - /* In SMB3.11 preauth integrity supersedes validate negotiate */ if (tcon->ses->server->dialect == SMB311_PROT_ID) return 0; @@ -765,63 +760,69 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL) cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n"); - vneg_inbuf.Capabilities = + pneg_inbuf = kmalloc(sizeof(*pneg_inbuf), GFP_NOFS); + if (!pneg_inbuf) + return -ENOMEM; + + pneg_inbuf->Capabilities = cpu_to_le32(tcon->ses->server->vals->req_capabilities); - memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid, + memcpy(pneg_inbuf->Guid, tcon->ses->server->client_guid, SMB2_CLIENT_GUID_SIZE); if (tcon->ses->sign) - vneg_inbuf.SecurityMode = + pneg_inbuf->SecurityMode = cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED); else if (global_secflags & CIFSSEC_MAY_SIGN) - vneg_inbuf.SecurityMode = + pneg_inbuf->SecurityMode = cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED); else - vneg_inbuf.SecurityMode = 0; + pneg_inbuf->SecurityMode = 0; if (strcmp(tcon->ses->server->vals->version_string, SMB3ANY_VERSION_STRING) == 0) { - vneg_inbuf.Dialects[0] = cpu_to_le16(SMB30_PROT_ID); - vneg_inbuf.Dialects[1] = cpu_to_le16(SMB302_PROT_ID); - vneg_inbuf.DialectCount = cpu_to_le16(2); + pneg_inbuf->Dialects[0] = cpu_to_le16(SMB30_PROT_ID); + pneg_inbuf->Dialects[1] = cpu_to_le16(SMB302_PROT_ID); + pneg_inbuf->DialectCount = cpu_to_le16(2); /* structure is big enough for 3 dialects, sending only 2 */ - inbuflen = sizeof(struct validate_negotiate_info_req) - 2; + inbuflen = sizeof(*pneg_inbuf) - + sizeof(pneg_inbuf->Dialects[0]); } else if (strcmp(tcon->ses->server->vals->version_string, SMBDEFAULT_VERSION_STRING) == 0) { - vneg_inbuf.Dialects[0] = cpu_to_le16(SMB21_PROT_ID); - vneg_inbuf.Dialects[1] = cpu_to_le16(SMB30_PROT_ID); - vneg_inbuf.Dialects[2] = cpu_to_le16(SMB302_PROT_ID); - vneg_inbuf.DialectCount = cpu_to_le16(3); + pneg_inbuf->Dialects[0] = cpu_to_le16(SMB21_PROT_ID); + pneg_inbuf->Dialects[1] = cpu_to_le16(SMB30_PROT_ID); + pneg_inbuf->Dialects[2] = cpu_to_le16(SMB302_PROT_ID); + pneg_inbuf->DialectCount = cpu_to_le16(3); /* structure is big enough for 3 dialects */ - inbuflen = sizeof(struct validate_negotiate_info_req); + inbuflen = sizeof(*pneg_inbuf); } else { /* otherwise specific dialect was requested */ - vneg_inbuf.Dialects[0] = + pneg_inbuf->Dialects[0] = cpu_to_le16(tcon->ses->server->vals->protocol_id); - vneg_inbuf.DialectCount = cpu_to_le16(1); + pneg_inbuf->DialectCount = cpu_to_le16(1); /* structure is big enough for 3 dialects, sending only 1 */ - inbuflen = sizeof(struct validate_negotiate_info_req) - 4; + inbuflen = sizeof(*pneg_inbuf) - + sizeof(pneg_inbuf->Dialects[0]) * 2; } rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */, - (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req), - (char **)&pneg_rsp, &rsplen); + (char *)pneg_inbuf, inbuflen, (char **)&pneg_rsp, &rsplen); if (rc != 0) { cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc); - return -EIO; + rc = -EIO; + goto out_free_inbuf; } - if (rsplen != sizeof(struct validate_negotiate_info_rsp)) { + rc = -EIO; + if (rsplen != sizeof(*pneg_rsp)) { cifs_dbg(VFS, "invalid protocol negotiate response size: %d\n", rsplen); /* relax check since Mac returns max bufsize allowed on ioctl */ - if ((rsplen > CIFSMaxBufSize) - || (rsplen < sizeof(struct validate_negotiate_info_rsp))) - goto err_rsp_free; + if (rsplen > CIFSMaxBufSize || rsplen < sizeof(*pneg_rsp)) + goto out_free_rsp; } /* check validate negotiate info response matches what we got earlier */ @@ -838,15 +839,17 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) goto vneg_out; /* validate negotiate successful */ + rc = 0; cifs_dbg(FYI, "validate negotiate info successful\n"); - kfree(pneg_rsp); - return 0; + goto out_free_rsp; vneg_out: cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n"); -err_rsp_free: +out_free_rsp: kfree(pneg_rsp); - return -EIO; +out_free_inbuf: + kfree(pneg_inbuf); + return rc; } enum securityEnum diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 017b0ab19bc4..124b093d14e5 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -492,7 +492,7 @@ static void cramfs_kill_sb(struct super_block *sb) { struct cramfs_sb_info *sbi = CRAMFS_SB(sb); - if (IS_ENABLED(CCONFIG_CRAMFS_MTD) && sb->s_mtd) { + if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) { if (sbi && sbi->mtd_point_size) mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size); kill_mtd_super(sb); diff --git a/fs/dcache.c b/fs/dcache.c index 86d2de63461e..2acfc69878f5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1899,6 +1899,28 @@ void d_instantiate(struct dentry *entry, struct inode * inode) } EXPORT_SYMBOL(d_instantiate); +/* + * This should be equivalent to d_instantiate() + unlock_new_inode(), + * with lockdep-related part of unlock_new_inode() done before + * anything else. Use that instead of open-coding d_instantiate()/ + * unlock_new_inode() combinations. + */ +void d_instantiate_new(struct dentry *entry, struct inode *inode) +{ + BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); + BUG_ON(!inode); + lockdep_annotate_inode_mutex_key(inode); + security_d_instantiate(entry, inode); + spin_lock(&inode->i_lock); + __d_instantiate(entry, inode); + WARN_ON(!(inode->i_state & I_NEW)); + inode->i_state &= ~I_NEW; + smp_mb(); + wake_up_bit(&inode->i_state, __I_NEW); + spin_unlock(&inode->i_lock); +} +EXPORT_SYMBOL(d_instantiate_new); + /** * d_instantiate_no_diralias - instantiate a non-aliased dentry * @entry: dentry to complete diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 97d17eaeba07..49121e5a8de2 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -283,8 +283,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, iget_failed(ecryptfs_inode); goto out; } - unlock_new_inode(ecryptfs_inode); - d_instantiate(ecryptfs_dentry, ecryptfs_inode); + d_instantiate_new(ecryptfs_dentry, ecryptfs_inode); out: return rc; } diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 1e01fabef130..71635909df3b 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1264,21 +1264,11 @@ do_indirects: static void ext2_truncate_blocks(struct inode *inode, loff_t offset) { - /* - * XXX: it seems like a bug here that we don't allow - * IS_APPEND inode to have blocks-past-i_size trimmed off. - * review and fix this. - * - * Also would be nice to be able to handle IO errors and such, - * but that's probably too much to ask. - */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return; if (ext2_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; dax_sem_down_write(EXT2_I(inode)); __ext2_truncate_blocks(inode, offset); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 55f7caadb093..152453a91877 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -41,8 +41,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) { int err = ext2_add_link(dentry, inode); if (!err) { - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; } inode_dec_link_count(inode); @@ -255,8 +254,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) if (err) goto out_fail; - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); out: return err; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b1f21e3a0763..4a09063ce1d2 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2411,8 +2411,7 @@ static int ext4_add_nondir(handle_t *handle, int err = ext4_add_entry(handle, dentry, inode); if (!err) { ext4_mark_inode_dirty(handle, inode); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; } drop_nlink(inode); @@ -2651,8 +2650,7 @@ out_clear_inode: err = ext4_mark_inode_dirty(handle, dir); if (err) goto out_clear_inode; - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index d5098efe577c..75e37fd720b2 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -294,8 +294,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, alloc_nid_done(sbi, ino); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); @@ -597,8 +596,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, err = page_symlink(inode, disk_link.name, disk_link.len); err_out: - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); /* * Let's flush symlink data in order to avoid broken symlink as much as @@ -661,8 +659,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) alloc_nid_done(sbi, inode->i_ino); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); @@ -713,8 +710,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, alloc_nid_done(sbi, inode->i_ino); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 513c357c734b..a6c0f54c48c3 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -588,6 +588,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) return 0; out_put_hidden_dir: + cancel_delayed_work_sync(&sbi->sync_work); iput(sbi->hidden_dir); out_put_root: dput(sb->s_root); diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 0a754f38462e..e5a6deb38e1e 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -209,8 +209,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, __func__, inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->pino_nlink, inode->i_mapping->nrpages); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; fail: @@ -430,8 +429,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; fail: @@ -575,8 +573,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; fail: @@ -747,8 +744,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; fail: diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index b41596d71858..56c3fcbfe80e 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -178,8 +178,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, unlock_new_inode(ip); iput(ip); } else { - unlock_new_inode(ip); - d_instantiate(dentry, ip); + d_instantiate_new(dentry, ip); } out2: @@ -313,8 +312,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) unlock_new_inode(ip); iput(ip); } else { - unlock_new_inode(ip); - d_instantiate(dentry, ip); + d_instantiate_new(dentry, ip); } out2: @@ -1059,8 +1057,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, unlock_new_inode(ip); iput(ip); } else { - unlock_new_inode(ip); - d_instantiate(dentry, ip); + d_instantiate_new(dentry, ip); } out2: @@ -1447,8 +1444,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, unlock_new_inode(ip); iput(ip); } else { - unlock_new_inode(ip); - d_instantiate(dentry, ip); + d_instantiate_new(dentry, ip); } out1: diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 26dd9a50f383..ff2716f9322e 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -316,6 +316,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, info->root = root; info->ns = ns; + INIT_LIST_HEAD(&info->node); sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags, &init_user_ns, info); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index a50d7813e3ea..d561161b7c3e 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -322,6 +322,8 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, static u32 do_callback_layoutrecall(struct nfs_client *clp, struct cb_layoutrecallargs *args) { + write_seqcount_begin(&clp->cl_callback_count); + write_seqcount_end(&clp->cl_callback_count); if (args->cbl_recall_type == RETURN_FILE) return initiate_file_draining(clp, args); return initiate_bulk_draining(clp, args); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b9129e2befea..02e97c29af0c 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -969,7 +969,8 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, } if (!(fattr->valid & NFS_ATTR_FATTR)) { - error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr, NULL); + error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, + fattr, NULL, NULL); if (error < 0) { dprintk("nfs_create_server: getattr error = %d\n", -error); goto error; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 1819d0d0ba4b..91c3737d69df 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -404,6 +404,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, trace_nfs4_set_delegation(inode, type); + spin_lock(&inode->i_lock); + if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME)) + NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED; + spin_unlock(&inode->i_lock); out: spin_unlock(&clp->cl_lock); if (delegation != NULL) @@ -483,38 +487,88 @@ out: int nfs_client_return_marked_delegations(struct nfs_client *clp) { struct nfs_delegation *delegation; + struct nfs_delegation *prev; struct nfs_server *server; struct inode *inode; + struct inode *place_holder = NULL; + struct nfs_delegation *place_holder_deleg = NULL; int err = 0; restart: + /* + * To avoid quadratic looping we hold a reference + * to an inode place_holder. Each time we restart, we + * list nfs_servers from the server of that inode, and + * delegation in the server from the delegations of that + * inode. + * prev is an RCU-protected pointer to a delegation which + * wasn't marked for return and might be a good choice for + * the next place_holder. + */ rcu_read_lock(); - list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - list_for_each_entry_rcu(delegation, &server->delegations, - super_list) { - if (!nfs_delegation_need_return(delegation)) + prev = NULL; + if (place_holder) + server = NFS_SERVER(place_holder); + else + server = list_entry_rcu(clp->cl_superblocks.next, + struct nfs_server, client_link); + list_for_each_entry_from_rcu(server, &clp->cl_superblocks, client_link) { + delegation = NULL; + if (place_holder && server == NFS_SERVER(place_holder)) + delegation = rcu_dereference(NFS_I(place_holder)->delegation); + if (!delegation || delegation != place_holder_deleg) + delegation = list_entry_rcu(server->delegations.next, + struct nfs_delegation, super_list); + list_for_each_entry_from_rcu(delegation, &server->delegations, super_list) { + struct inode *to_put = NULL; + + if (!nfs_delegation_need_return(delegation)) { + prev = delegation; continue; + } if (!nfs_sb_active(server->super)) - continue; + break; /* continue in outer loop */ + + if (prev) { + struct inode *tmp; + + tmp = nfs_delegation_grab_inode(prev); + if (tmp) { + to_put = place_holder; + place_holder = tmp; + place_holder_deleg = prev; + } + } + inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) { rcu_read_unlock(); + if (to_put) + iput(to_put); nfs_sb_deactive(server->super); goto restart; } delegation = nfs_start_delegation_return_locked(NFS_I(inode)); rcu_read_unlock(); + if (to_put) + iput(to_put); + err = nfs_end_delegation_return(inode, delegation, 0); iput(inode); nfs_sb_deactive(server->super); + cond_resched(); if (!err) goto restart; set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); + if (place_holder) + iput(place_holder); return err; } } rcu_read_unlock(); + if (place_holder) + iput(place_holder); return 0; } @@ -887,7 +941,7 @@ restart: &delegation->flags) == 0) continue; if (!nfs_sb_active(server->super)) - continue; + break; /* continue in outer loop */ inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) { rcu_read_unlock(); @@ -904,6 +958,7 @@ restart: } iput(inode); nfs_sb_deactive(server->super); + cond_resched(); goto restart; } } @@ -995,7 +1050,7 @@ restart: &delegation->flags) == 0) continue; if (!nfs_sb_active(server->super)) - continue; + break; /* continue in outer loop */ inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) { rcu_read_unlock(); @@ -1020,6 +1075,7 @@ restart: } iput(inode); nfs_sb_deactive(server->super); + cond_resched(); goto restart; } } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 73f8b43d988c..7a9c14426855 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1012,13 +1012,25 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags) if (IS_AUTOMOUNT(inode)) return 0; + + if (flags & LOOKUP_OPEN) { + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + /* A NFSv4 OPEN will revalidate later */ + if (server->caps & NFS_CAP_ATOMIC_OPEN) + goto out; + /* Fallthrough */ + case S_IFDIR: + if (server->flags & NFS_MOUNT_NOCTO) + break; + /* NFS close-to-open cache consistency validation */ + goto out_force; + } + } + /* VFS wants an on-the-wire revalidation */ if (flags & LOOKUP_REVAL) goto out_force; - /* This is an open(2) */ - if ((flags & LOOKUP_OPEN) && !(server->flags & NFS_MOUNT_NOCTO) && - (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) - goto out_force; out: return (inode->i_nlink == 0) ? -ENOENT : 0; out_force: @@ -1039,13 +1051,15 @@ out_force: * * If LOOKUP_RCU prevents us from performing a full check, return 1 * suggesting a reval is needed. + * + * Note that when creating a new file, or looking up a rename target, + * then it shouldn't be necessary to revalidate a negative dentry. */ static inline int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, unsigned int flags) { - /* Don't revalidate a negative dentry if we're creating a new file */ - if (flags & LOOKUP_CREATE) + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) return 0; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) return 1; @@ -1106,7 +1120,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) goto out_set_verifier; /* Force a full look up iff the parent directory has changed */ - if (!nfs_is_exclusive_create(dir, flags) && + if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) && nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) { error = nfs_lookup_verify_inode(inode, flags); if (error) { @@ -1270,11 +1284,13 @@ static void nfs_drop_nlink(struct inode *inode) { spin_lock(&inode->i_lock); /* drop the inode if we're reasonably sure this is the last link */ - if (inode->i_nlink == 1) - clear_nlink(inode); + if (inode->i_nlink > 0) + drop_nlink(inode); + NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter(); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME - | NFS_INO_INVALID_OTHER; + | NFS_INO_INVALID_OTHER + | NFS_INO_REVAL_FORCED; spin_unlock(&inode->i_lock); } @@ -1335,7 +1351,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in * If we're doing an exclusive create, optimize away the lookup * but don't hash the dentry. */ - if (nfs_is_exclusive_create(dir, flags)) + if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET) return NULL; res = ERR_PTR(-ENOMEM); @@ -1640,7 +1656,8 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); if (!(fattr->valid & NFS_ATTR_FATTR)) { struct nfs_server *server = NFS_SB(dentry->d_sb); - error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr, NULL); + error = server->nfs_client->rpc_ops->getattr(server, fhandle, + fattr, NULL, NULL); if (error < 0) goto out_error; } @@ -2036,7 +2053,15 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, } else error = task->tk_status; rpc_put_task(task); - nfs_mark_for_revalidate(old_inode); + /* Ensure the inode attributes are revalidated */ + if (error == 0) { + spin_lock(&old_inode->i_lock); + NFS_I(old_inode)->attr_gencount = nfs_inc_attr_generation_counter(); + NFS_I(old_inode)->cache_validity |= NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME + | NFS_INO_REVAL_FORCED; + spin_unlock(&old_inode->i_lock); + } out: if (rehash) d_rehash(rehash); diff --git a/fs/nfs/export.c b/fs/nfs/export.c index ab5de3246c5c..deecb67638aa 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -102,7 +102,7 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid, } rpc_ops = NFS_SB(sb)->nfs_client->rpc_ops; - ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label); + ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label, NULL); if (ret) { dprintk("%s: getattr failed %d\n", __func__, ret); dentry = ERR_PTR(ret); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index c75ad982bcfc..3ae038d9c292 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2347,6 +2347,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .id = LAYOUT_FLEX_FILES, .name = "LAYOUT_FLEX_FILES", .owner = THIS_MODULE, + .flags = PNFS_LAYOUTGET_ON_OPEN, .set_layoutdriver = ff_layout_set_layoutdriver, .alloc_layout_hdr = ff_layout_alloc_layout_hdr, .free_layout_hdr = ff_layout_free_layout_hdr, diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bd15d0b57626..73473d9bdfa4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -195,10 +195,16 @@ bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags) static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); - bool have_delegation = nfs_have_delegated_attributes(inode); + bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ); + + if (have_delegation) { + if (!(flags & NFS_INO_REVAL_FORCED)) + flags &= ~NFS_INO_INVALID_OTHER; + flags &= ~(NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_SIZE + | NFS_INO_REVAL_PAGECACHE); + } - if (have_delegation) - flags &= ~(NFS_INO_INVALID_CHANGE|NFS_INO_REVAL_PAGECACHE); if (inode->i_mapping->nrpages == 0) flags &= ~NFS_INO_INVALID_DATA; nfsi->cache_validity |= flags; @@ -448,6 +454,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st /* We can't support update_atime(), since the server will reset it */ inode->i_flags |= S_NOATIME|S_NOCMTIME; inode->i_mode = fattr->mode; + nfsi->cache_validity = 0; if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 && nfs_server_capable(inode, NFS_CAP_MODE)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); @@ -534,6 +541,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); } + if (nfsi->cache_validity != 0) + nfsi->cache_validity |= NFS_INO_REVAL_FORCED; + nfs_setsecurity(inode, fattr, label); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); @@ -667,9 +677,13 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, spin_lock(&inode->i_lock); NFS_I(inode)->attr_gencount = fattr->gencount; - nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE - | NFS_INO_INVALID_CTIME); + if ((attr->ia_valid & ATTR_SIZE) != 0) { + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); + nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); + nfs_vmtruncate(inode, attr->ia_size); + } if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_CTIME; if ((attr->ia_valid & ATTR_MODE) != 0) { int mode = attr->ia_mode & S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO; @@ -679,13 +693,45 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; + if (fattr->valid & NFS_ATTR_FATTR_CTIME) + inode->i_ctime = fattr->ctime; + else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME); nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL); } - if ((attr->ia_valid & ATTR_SIZE) != 0) { - nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); - nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); - nfs_vmtruncate(inode, attr->ia_size); + if (attr->ia_valid & (ATTR_ATIME_SET|ATTR_ATIME)) { + NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME + | NFS_INO_INVALID_CTIME); + if (fattr->valid & NFS_ATTR_FATTR_ATIME) + inode->i_atime = fattr->atime; + else if (attr->ia_valid & ATTR_ATIME_SET) + inode->i_atime = attr->ia_atime; + else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); + + if (fattr->valid & NFS_ATTR_FATTR_CTIME) + inode->i_ctime = fattr->ctime; + else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME); + } + if (attr->ia_valid & (ATTR_MTIME_SET|ATTR_MTIME)) { + NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME + | NFS_INO_INVALID_CTIME); + if (fattr->valid & NFS_ATTR_FATTR_MTIME) + inode->i_mtime = fattr->mtime; + else if (attr->ia_valid & ATTR_MTIME_SET) + inode->i_mtime = attr->ia_mtime; + else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); + + if (fattr->valid & NFS_ATTR_FATTR_CTIME) + inode->i_ctime = fattr->ctime; + else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME); } if (fattr->valid) nfs_update_inode(inode, fattr); @@ -1097,7 +1143,8 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) goto out; } - status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, label); + status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, + label, inode); if (status != 0) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n", inode->i_sb->s_id, @@ -1349,8 +1396,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat unsigned long invalid = 0; - if (nfs_have_delegated_attributes(inode)) + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) return 0; + /* Has the inode gone and changed behind our back? */ if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) return -ESTALE; @@ -1400,7 +1448,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat invalid |= NFS_INO_INVALID_ATIME; if (invalid != 0) - nfs_set_cache_invalid(inode, invalid | NFS_INO_REVAL_FORCED); + nfs_set_cache_invalid(inode, invalid); nfsi->read_cache_jiffies = fattr->time_start; return 0; @@ -1629,7 +1677,8 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_fattr_set_barrier(fattr); status = nfs_post_op_update_inode_locked(inode, fattr, NFS_INO_INVALID_CHANGE - | NFS_INO_INVALID_CTIME); + | NFS_INO_INVALID_CTIME + | NFS_INO_REVAL_FORCED); spin_unlock(&inode->i_lock); return status; @@ -1746,6 +1795,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) unsigned long save_cache_validity; bool have_writers = nfs_file_has_buffered_writers(nfsi); bool cache_revalidated = true; + bool attr_changed = false; + bool have_delegation; dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, @@ -1780,6 +1831,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) !IS_AUTOMOUNT(inode)) server->fsid = fattr->fsid; + /* Save the delegation state before clearing cache_validity */ + have_delegation = nfs_have_delegated_attributes(inode); + /* * Update the read time so we don't revalidate too often. */ @@ -1802,12 +1856,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* More cache consistency checks */ if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { if (!inode_eq_iversion_raw(inode, fattr->change_attr)) { - dprintk("NFS: change_attr change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); /* Could it be a race with writeback? */ - if (!have_writers) { - invalid |= NFS_INO_INVALID_CHANGE - | NFS_INO_INVALID_DATA + if (!(have_writers || have_delegation)) { + invalid |= NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Force revalidate of all attributes */ @@ -1817,8 +1868,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | NFS_INO_INVALID_OTHER; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); + dprintk("NFS: change_attr change on server for file %s/%ld\n", + inode->i_sb->s_id, + inode->i_ino); } inode_set_iversion_raw(inode, fattr->change_attr); + attr_changed = true; } } else { nfsi->cache_validity |= save_cache_validity & @@ -1850,13 +1905,14 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_SIZE) { new_isize = nfs_size_to_loff_t(fattr->size); cur_isize = i_size_read(inode); - if (new_isize != cur_isize) { + if (new_isize != cur_isize && !have_delegation) { /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { i_size_write(inode, new_isize); if (!have_writers) invalid |= NFS_INO_INVALID_DATA; + attr_changed = true; } dprintk("NFS: isize change on server for file %s/%ld " "(%Ld to %Ld)\n", @@ -1889,14 +1945,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) newmode |= fattr->mode & S_IALLUGO; inode->i_mode = newmode; invalid |= NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER; + | NFS_INO_INVALID_ACL; + attr_changed = true; } } else if (server->caps & NFS_CAP_MODE) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER + (NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); cache_revalidated = false; } @@ -1904,15 +1958,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (!uid_eq(inode->i_uid, fattr->uid)) { invalid |= NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER; + | NFS_INO_INVALID_ACL; inode->i_uid = fattr->uid; + attr_changed = true; } } else if (server->caps & NFS_CAP_OWNER) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER + (NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); cache_revalidated = false; } @@ -1920,25 +1972,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (!gid_eq(inode->i_gid, fattr->gid)) { invalid |= NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER; + | NFS_INO_INVALID_ACL; inode->i_gid = fattr->gid; + attr_changed = true; } } else if (server->caps & NFS_CAP_OWNER_GROUP) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER + (NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); cache_revalidated = false; } if (fattr->valid & NFS_ATTR_FATTR_NLINK) { if (inode->i_nlink != fattr->nlink) { - invalid |= NFS_INO_INVALID_OTHER; if (S_ISDIR(inode->i_mode)) invalid |= NFS_INO_INVALID_DATA; set_nlink(inode, fattr->nlink); + attr_changed = true; } } else if (server->caps & NFS_CAP_NLINK) { nfsi->cache_validity |= save_cache_validity & @@ -1958,7 +2008,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) cache_revalidated = false; /* Update attrtimeo value if we're out of the unstable period */ - if (invalid & NFS_INO_INVALID_ATTR) { + if (attr_changed) { invalid &= ~NFS_INO_INVALID_ATTR; nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); @@ -1984,9 +2034,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; - if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) || - (save_cache_validity & NFS_INO_REVAL_FORCED)) - nfs_set_cache_invalid(inode, invalid); + nfs_set_cache_invalid(inode, invalid); return 0; out_err: diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index eadf1ab31d16..ec8a9efa268f 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -101,7 +101,8 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, */ static int nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr, struct nfs4_label *label, + struct inode *inode) { struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR], @@ -414,7 +415,9 @@ out: } static void -nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) +nfs3_proc_unlink_setup(struct rpc_message *msg, + struct dentry *dentry, + struct inode *inode) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; } @@ -823,7 +826,8 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) } static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr, - struct rpc_message *msg) + struct rpc_message *msg, + struct rpc_clnt **clnt) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; } @@ -844,7 +848,8 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data) return 0; } -static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) +static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg, + struct rpc_clnt **clnt) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT]; } diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 9c374441f660..5f59b6f65a42 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -370,6 +370,10 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case 0: break; + case -NFS4ERR_BADHANDLE: + case -ESTALE: + pnfs_destroy_layout(NFS_I(inode)); + break; case -NFS4ERR_EXPIRED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: @@ -462,7 +466,7 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, nfs42_layoutstat_release(data); return -EAGAIN; } - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0, 0); task = rpc_run_task(&task_setup); if (IS_ERR(task)) return PTR_ERR(task); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b374f680830c..137e18abb7e7 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -212,6 +212,31 @@ struct nfs4_state_recovery_ops { struct rpc_cred *); }; +struct nfs4_opendata { + struct kref kref; + struct nfs_openargs o_arg; + struct nfs_openres o_res; + struct nfs_open_confirmargs c_arg; + struct nfs_open_confirmres c_res; + struct nfs4_string owner_name; + struct nfs4_string group_name; + struct nfs4_label *a_label; + struct nfs_fattr f_attr; + struct nfs4_label *f_label; + struct dentry *dir; + struct dentry *dentry; + struct nfs4_state_owner *owner; + struct nfs4_state *state; + struct iattr attrs; + struct nfs4_layoutget *lgp; + unsigned long timestamp; + bool rpc_done; + bool file_created; + bool is_recover; + bool cancelled; + int rpc_status; +}; + struct nfs4_add_xprt_data { struct nfs_client *clp; struct rpc_cred *cred; @@ -251,7 +276,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, struct rpc_message *, struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); -extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); +extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int, int); extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 22dc30a679a0..b6f9d84ba19b 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -343,7 +343,7 @@ static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, int id_len; ssize_t ret; - id_len = snprintf(id_str, sizeof(id_str), "%u", id); + id_len = nfs_map_numeric_to_string(id, id_str, sizeof(id_str)); ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap); if (ret < 0) return -EINVAL; @@ -627,7 +627,8 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im, if (strcmp(upcall->im_name, im->im_name) != 0) break; /* Note: here we store the NUL terminator too */ - len = sprintf(id_str, "%d", im->im_id) + 1; + len = 1 + nfs_map_numeric_to_string(im->im_id, id_str, + sizeof(id_str)); ret = nfs_idmap_instantiate(key, authkey, id_str, len); break; case IDMAP_CONV_IDTONAME: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b71757e85066..f413d0c8c837 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -71,6 +71,8 @@ #define NFSDBG_FACILITY NFSDBG_PROC +#define NFS4_BITMASK_SZ 3 + #define NFS4_POLL_RETRY_MIN (HZ/10) #define NFS4_POLL_RETRY_MAX (15*HZ) @@ -86,12 +88,12 @@ | ATTR_MTIME_SET) struct nfs4_opendata; -static int _nfs4_proc_open(struct nfs4_opendata *data); +static void nfs4_layoutget_release(void *calldata); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); -static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); -static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); +static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label, struct inode *inode); +static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode); static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, struct nfs_open_context *ctx, struct nfs4_label *ilabel, @@ -274,6 +276,33 @@ const u32 nfs4_fs_locations_bitmap[3] = { | FATTR4_WORD1_MOUNTED_ON_FILEID, }; +static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src, + struct inode *inode) +{ + unsigned long cache_validity; + + memcpy(dst, src, NFS4_BITMASK_SZ*sizeof(*dst)); + if (!inode || !nfs4_have_delegation(inode, FMODE_READ)) + return; + + cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + if (!(cache_validity & NFS_INO_REVAL_FORCED)) + cache_validity &= ~(NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_SIZE); + + if (!(cache_validity & NFS_INO_INVALID_SIZE)) + dst[0] &= ~FATTR4_WORD0_SIZE; + + if (!(cache_validity & NFS_INO_INVALID_CHANGE)) + dst[0] &= ~FATTR4_WORD0_CHANGE; +} + +static void nfs4_bitmap_copy_adjust_setattr(__u32 *dst, + const __u32 *src, struct inode *inode) +{ + nfs4_bitmap_copy_adjust(dst, src, inode); +} + static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, struct nfs4_readdir_arg *readdir) { @@ -407,6 +436,11 @@ static int nfs4_do_handle_exception(struct nfs_server *server, switch(errorcode) { case 0: return 0; + case -NFS4ERR_BADHANDLE: + case -ESTALE: + if (inode != NULL && S_ISREG(inode->i_mode)) + pnfs_destroy_layout(NFS_I(inode)); + break; case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_EXPIRED: @@ -608,20 +642,16 @@ struct nfs4_call_sync_data { }; void nfs4_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) + struct nfs4_sequence_res *res, int cache_reply, + int privileged) { args->sa_slot = NULL; args->sa_cache_this = cache_reply; - args->sa_privileged = 0; + args->sa_privileged = privileged; res->sr_slot = NULL; } -static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) -{ - args->sa_privileged = 1; -} - static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res) { struct nfs4_slot *slot = res->sr_slot; @@ -877,6 +907,10 @@ nfs4_sequence_process_interrupted(struct nfs_client *client, #else /* !CONFIG_NFS_V4_1 */ +static void nfs4_layoutget_release(void *calldata) +{ +} + static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) { return nfs40_sequence_done(task, res); @@ -1035,7 +1069,7 @@ int nfs4_call_sync(struct rpc_clnt *clnt, struct nfs4_sequence_res *res, int cache_reply) { - nfs4_init_sequence(args, res, cache_reply); + nfs4_init_sequence(args, res, cache_reply, 0); return nfs4_call_sync_sequence(clnt, server, msg, args, res); } @@ -1064,30 +1098,6 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, spin_unlock(&dir->i_lock); } -struct nfs4_opendata { - struct kref kref; - struct nfs_openargs o_arg; - struct nfs_openres o_res; - struct nfs_open_confirmargs c_arg; - struct nfs_open_confirmres c_res; - struct nfs4_string owner_name; - struct nfs4_string group_name; - struct nfs4_label *a_label; - struct nfs_fattr f_attr; - struct nfs4_label *f_label; - struct dentry *dir; - struct dentry *dentry; - struct nfs4_state_owner *owner; - struct nfs4_state *state; - struct iattr attrs; - unsigned long timestamp; - bool rpc_done; - bool file_created; - bool is_recover; - bool cancelled; - int rpc_status; -}; - struct nfs4_open_createattrs { struct nfs4_label *label; struct iattr *sattr; @@ -1268,6 +1278,7 @@ static void nfs4_opendata_free(struct kref *kref) struct nfs4_opendata, kref); struct super_block *sb = p->dentry->d_sb; + nfs4_lgopen_release(p->lgp); nfs_free_seqid(p->o_arg.seqid); nfs4_sequence_free_slot(&p->o_res.seq_res); if (p->state != NULL) @@ -2187,13 +2198,12 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) }; int status; - nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1); + nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1, + data->is_recover); kref_get(&data->kref); data->rpc_done = false; data->rpc_status = 0; data->timestamp = jiffies; - if (data->is_recover) - nfs4_set_sequence_privileged(&data->c_arg.seq_args); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -2327,7 +2337,8 @@ static const struct rpc_call_ops nfs4_open_ops = { .rpc_release = nfs4_open_release, }; -static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) +static int nfs4_run_open_task(struct nfs4_opendata *data, + struct nfs_open_context *ctx) { struct inode *dir = d_inode(data->dir); struct nfs_server *server = NFS_SERVER(dir); @@ -2350,15 +2361,17 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) }; int status; - nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1); kref_get(&data->kref); data->rpc_done = false; data->rpc_status = 0; data->cancelled = false; data->is_recover = false; - if (isrecover) { - nfs4_set_sequence_privileged(&o_arg->seq_args); + if (!ctx) { + nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 1); data->is_recover = true; + } else { + nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 0); + pnfs_lgopen_prepare(data, ctx); } task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) @@ -2380,7 +2393,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) struct nfs_openres *o_res = &data->o_res; int status; - status = nfs4_run_open_task(data, 1); + status = nfs4_run_open_task(data, NULL); if (status != 0 || !data->rpc_done) return status; @@ -2441,7 +2454,8 @@ static int nfs4_opendata_access(struct rpc_cred *cred, /* * Note: On error, nfs4_proc_open will free the struct nfs4_opendata */ -static int _nfs4_proc_open(struct nfs4_opendata *data) +static int _nfs4_proc_open(struct nfs4_opendata *data, + struct nfs_open_context *ctx) { struct inode *dir = d_inode(data->dir); struct nfs_server *server = NFS_SERVER(dir); @@ -2449,7 +2463,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) struct nfs_openres *o_res = &data->o_res; int status; - status = nfs4_run_open_task(data, 0); + status = nfs4_run_open_task(data, ctx); if (!data->rpc_done) return status; if (status != 0) { @@ -2480,7 +2494,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) } if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) { nfs4_sequence_free_slot(&o_res->seq_res); - nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); + nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, + o_res->f_label, NULL); } return 0; } @@ -2800,11 +2815,11 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); - ret = _nfs4_proc_open(opendata); + ret = _nfs4_proc_open(opendata, ctx); if (ret != 0) goto out; - state = nfs4_opendata_to_nfs4_state(opendata); + state = _nfs4_opendata_to_nfs4_state(opendata); ret = PTR_ERR(state); if (IS_ERR(state)) goto out; @@ -2838,8 +2853,12 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, nfs_inode_attach_open_context(ctx); if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) nfs4_schedule_stateid_recovery(server, state); + else + pnfs_parse_lgopen(state->inode, opendata->lgp, ctx); } + out: + nfs4_sequence_free_slot(&opendata->o_res.seq_res); return ret; } @@ -3039,7 +3058,6 @@ static int _nfs4_do_setattr(struct inode *inode, }; struct rpc_cred *delegation_cred = NULL; unsigned long timestamp = jiffies; - fmode_t fmode; bool truncate; int status; @@ -3047,11 +3065,12 @@ static int _nfs4_do_setattr(struct inode *inode, /* Servers should only apply open mode checks for file size changes */ truncate = (arg->iap->ia_valid & ATTR_SIZE) ? true : false; - fmode = truncate ? FMODE_WRITE : FMODE_READ; + if (!truncate) + goto zero_stateid; - if (nfs4_copy_delegation_stateid(inode, fmode, &arg->stateid, &delegation_cred)) { + if (nfs4_copy_delegation_stateid(inode, FMODE_WRITE, &arg->stateid, &delegation_cred)) { /* Use that stateid */ - } else if (truncate && ctx != NULL) { + } else if (ctx != NULL) { struct nfs_lock_context *l_ctx; if (!nfs4_valid_open_stateid(ctx->state)) return -EBADF; @@ -3063,8 +3082,10 @@ static int _nfs4_do_setattr(struct inode *inode, nfs_put_lock_context(l_ctx); if (status == -EIO) return -EBADF; - } else + } else { +zero_stateid: nfs4_stateid_copy(&arg->stateid, &zero_stateid); + } if (delegation_cred) msg.rpc_cred = delegation_cred; @@ -3083,12 +3104,13 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs4_label *olabel) { struct nfs_server *server = NFS_SERVER(inode); + __u32 bitmask[NFS4_BITMASK_SZ]; struct nfs4_state *state = ctx ? ctx->state : NULL; struct nfs_setattrargs arg = { .fh = NFS_FH(inode), .iap = sattr, .server = server, - .bitmask = server->attr_bitmask, + .bitmask = bitmask, .label = ilabel, }; struct nfs_setattrres res = { @@ -3103,11 +3125,11 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, }; int err; - arg.bitmask = nfs4_bitmask(server, ilabel); - if (ilabel) - arg.bitmask = nfs4_bitmask(server, olabel); - do { + nfs4_bitmap_copy_adjust_setattr(bitmask, + nfs4_bitmask(server, olabel), + inode); + err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx); switch (err) { case -NFS4ERR_OPENMODE: @@ -3393,7 +3415,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata = kzalloc(sizeof(*calldata), gfp_mask); if (calldata == NULL) goto out; - nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1); + nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1, 0); calldata->inode = state->inode; calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); @@ -3742,7 +3764,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, if (IS_ERR(label)) return PTR_ERR(label); - error = nfs4_proc_getattr(server, mntfh, fattr, label); + error = nfs4_proc_getattr(server, mntfh, fattr, label, NULL); if (error < 0) { dprintk("nfs4_get_root: getattr error = %d\n", -error); goto err_free_label; @@ -3807,11 +3829,13 @@ out: } static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr, struct nfs4_label *label, + struct inode *inode) { + __u32 bitmask[NFS4_BITMASK_SZ]; struct nfs4_getattr_arg args = { .fh = fhandle, - .bitmask = server->attr_bitmask, + .bitmask = bitmask, }; struct nfs4_getattr_res res = { .fattr = fattr, @@ -3824,19 +3848,20 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = &res, }; - args.bitmask = nfs4_bitmask(server, label); + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode); nfs_fattr_init(fattr); return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); } static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr, struct nfs4_label *label, + struct inode *inode) { struct nfs4_exception exception = { }; int err; do { - err = _nfs4_proc_getattr(server, fhandle, fattr, label); + err = _nfs4_proc_getattr(server, fhandle, fattr, label, inode); trace_nfs4_getattr(server, fhandle, fattr, err); err = nfs4_handle_exception(server, err, &exception); @@ -4089,7 +4114,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry }; int status = 0; - if (!nfs_have_delegated_attributes(inode)) { + if (!nfs4_have_delegation(inode, FMODE_READ)) { res.fattr = nfs_alloc_fattr(); if (res.fattr == NULL) return -ENOMEM; @@ -4265,15 +4290,16 @@ static int nfs4_proc_rmdir(struct inode *dir, const struct qstr *name) return err; } -static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) +static void nfs4_proc_unlink_setup(struct rpc_message *msg, + struct dentry *dentry, + struct inode *inode) { struct nfs_removeargs *args = msg->rpc_argp; struct nfs_removeres *res = msg->rpc_resp; - struct inode *inode = d_inode(dentry); res->server = NFS_SB(dentry->d_sb); msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; - nfs4_init_sequence(&args->seq_args, &res->seq_res, 1); + nfs4_init_sequence(&args->seq_args, &res->seq_res, 1, 0); nfs_fattr_init(res->dir_attr); @@ -4319,7 +4345,7 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, nfs4_inode_return_delegation(new_inode); msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; res->server = NFS_SB(old_dentry->d_sb); - nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1); + nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1, 0); } static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) @@ -4352,11 +4378,12 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name) { struct nfs_server *server = NFS_SERVER(inode); + __u32 bitmask[NFS4_BITMASK_SZ]; struct nfs4_link_arg arg = { .fh = NFS_FH(inode), .dir_fh = NFS_FH(dir), .name = name, - .bitmask = server->attr_bitmask, + .bitmask = bitmask, }; struct nfs4_link_res res = { .server = server, @@ -4378,9 +4405,9 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct status = PTR_ERR(res.label); goto out; } - arg.bitmask = nfs4_bitmask(server, res.label); nfs4_inode_make_writeable(inode); + nfs4_bitmap_copy_adjust_setattr(bitmask, nfs4_bitmask(server, res.label), inode); status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { @@ -4895,7 +4922,7 @@ static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr, if (!hdr->pgio_done_cb) hdr->pgio_done_cb = nfs4_read_done_cb; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; - nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0); + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); } static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, @@ -4979,7 +5006,8 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) } static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, - struct rpc_message *msg) + struct rpc_message *msg, + struct rpc_clnt **clnt) { struct nfs_server *server = NFS_SERVER(hdr->inode); @@ -4995,7 +5023,8 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, hdr->timestamp = jiffies; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; - nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1); + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1, 0); + nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) @@ -5026,7 +5055,8 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data) return data->commit_done_cb(task, data); } -static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) +static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg, + struct rpc_clnt **clnt) { struct nfs_server *server = NFS_SERVER(data->inode); @@ -5034,7 +5064,8 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess data->commit_done_cb = nfs4_commit_done_cb; data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); + nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_COMMIT, clnt, msg); } struct nfs4_renewdata { @@ -5391,7 +5422,8 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl */ spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE - | NFS_INO_INVALID_CTIME; + | NFS_INO_INVALID_CTIME + | NFS_INO_REVAL_FORCED; spin_unlock(&inode->i_lock); nfs_access_zap_cache(inode); nfs_zap_acl_cache(inode); @@ -5972,7 +6004,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co data = kzalloc(sizeof(*data), GFP_NOFS); if (data == NULL) return -ENOMEM; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP, @@ -6247,7 +6279,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, return ERR_PTR(-ENOMEM); } - nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1, 0); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; @@ -6411,32 +6443,36 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) case 0: renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)), data->timestamp); - if (data->arg.new_lock) { + if (data->arg.new_lock && !data->cancelled) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); - if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) { - rpc_restart_call_prepare(task); + if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) break; - } } + if (data->arg.new_lock_owner != 0) { nfs_confirm_seqid(&lsp->ls_seqid, 0); nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid); set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); - } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid)) - rpc_restart_call_prepare(task); + goto out_done; + } else if (nfs4_update_lock_stateid(lsp, &data->res.stateid)) + goto out_done; + break; case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: if (data->arg.new_lock_owner != 0) { - if (!nfs4_stateid_match(&data->arg.open_stateid, + if (nfs4_stateid_match(&data->arg.open_stateid, &lsp->ls_state->open_stateid)) - rpc_restart_call_prepare(task); - } else if (!nfs4_stateid_match(&data->arg.lock_stateid, + goto out_done; + } else if (nfs4_stateid_match(&data->arg.lock_stateid, &lsp->ls_stateid)) - rpc_restart_call_prepare(task); + goto out_done; } + if (!data->cancelled) + rpc_restart_call_prepare(task); +out_done: dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); } @@ -6509,14 +6545,14 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f return -ENOMEM; if (IS_SETLKW(cmd)) data->arg.block = 1; - nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1, + recovery_type > NFS_LOCK_NEW); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; if (recovery_type > NFS_LOCK_NEW) { if (recovery_type == NFS_LOCK_RECLAIM) data->arg.reclaim = NFS_LOCK_RECLAIM; - nfs4_set_sequence_privileged(&data->arg.seq_args); } else data->arg.new_lock = 1; task = rpc_run_task(&task_setup_data); @@ -6911,7 +6947,7 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0, 0); rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); } @@ -7107,8 +7143,7 @@ static int _nfs40_proc_get_locations(struct inode *inode, locations->server = server; locations->nlocations = 0; - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); status = nfs4_call_sync_sequence(clnt, server, &msg, &args.seq_args, &res.seq_res); if (status) @@ -7161,8 +7196,7 @@ static int _nfs41_proc_get_locations(struct inode *inode, locations->server = server; locations->nlocations = 0; - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); status = nfs4_call_sync_sequence(clnt, server, &msg, &args.seq_args, &res.seq_res); if (status == NFS4_OK && @@ -7249,8 +7283,7 @@ static int _nfs40_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) if (res.fh == NULL) return -ENOMEM; - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); status = nfs4_call_sync_sequence(clnt, server, &msg, &args.seq_args, &res.seq_res); nfs_free_fhandle(res.fh); @@ -7291,8 +7324,7 @@ static int _nfs41_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) if (res.fh == NULL) return -ENOMEM; - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); status = nfs4_call_sync_sequence(clnt, server, &msg, &args.seq_args, &res.seq_res); nfs_free_fhandle(res.fh); @@ -8070,8 +8102,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) }; int status; - nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); - nfs4_set_sequence_privileged(&args.la_seq_args); + nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0, 1); task = rpc_run_task(&task_setup); if (IS_ERR(task)) @@ -8408,10 +8439,8 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, calldata = kzalloc(sizeof(*calldata), GFP_NOFS); if (calldata == NULL) goto out_put_clp; - nfs4_init_sequence(&calldata->args, &calldata->res, 0); + nfs4_init_sequence(&calldata->args, &calldata->res, 0, is_privileged); nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot); - if (is_privileged) - nfs4_set_sequence_privileged(&calldata->args); msg.rpc_argp = &calldata->args; msg.rpc_resp = &calldata->res; calldata->clp = clp; @@ -8563,8 +8592,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, calldata->clp = clp; calldata->arg.one_fs = 0; - nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); - nfs4_set_sequence_privileged(&calldata->arg.seq_args); + nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0, 1); msg.rpc_argp = &calldata->arg; msg.rpc_resp = &calldata->res; task_setup_data.callback_data = calldata; @@ -8693,63 +8721,19 @@ out: return status; } -static size_t max_response_pages(struct nfs_server *server) +size_t max_response_pages(struct nfs_server *server) { u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; return nfs_page_array_len(0, max_resp_sz); } -static void nfs4_free_pages(struct page **pages, size_t size) -{ - int i; - - if (!pages) - return; - - for (i = 0; i < size; i++) { - if (!pages[i]) - break; - __free_page(pages[i]); - } - kfree(pages); -} - -static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) -{ - struct page **pages; - int i; - - pages = kcalloc(size, sizeof(struct page *), gfp_flags); - if (!pages) { - dprintk("%s: can't alloc array of %zu pages\n", __func__, size); - return NULL; - } - - for (i = 0; i < size; i++) { - pages[i] = alloc_page(gfp_flags); - if (!pages[i]) { - dprintk("%s: failed to allocate page\n", __func__); - nfs4_free_pages(pages, size); - return NULL; - } - } - - return pages; -} - static void nfs4_layoutget_release(void *calldata) { struct nfs4_layoutget *lgp = calldata; - struct inode *inode = lgp->args.inode; - struct nfs_server *server = NFS_SERVER(inode); - size_t max_pages = max_response_pages(server); dprintk("--> %s\n", __func__); nfs4_sequence_free_slot(&lgp->res.seq_res); - nfs4_free_pages(lgp->args.layout.pages, max_pages); - pnfs_put_layout_hdr(NFS_I(inode)->layout); - put_nfs_open_context(lgp->args.ctx); - kfree(calldata); + pnfs_layoutget_free(lgp); dprintk("<-- %s\n", __func__); } @@ -8760,11 +8744,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { }; struct pnfs_layout_segment * -nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) +nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) { struct inode *inode = lgp->args.inode; struct nfs_server *server = NFS_SERVER(inode); - size_t max_pages = max_response_pages(server); struct rpc_task *task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], @@ -8791,16 +8774,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ pnfs_get_layout_hdr(NFS_I(inode)->layout); - lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); - if (!lgp->args.layout.pages) { - nfs4_layoutget_release(lgp); - return ERR_PTR(-ENOMEM); - } - lgp->args.layout.pglen = max_pages * PAGE_SIZE; - - lgp->res.layoutp = &lgp->args.layout; - lgp->res.seq_res.sr_slot = NULL; - nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); + nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) @@ -8927,7 +8901,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) } task_setup_data.flags |= RPC_TASK_ASYNC; } - nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); + nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -9074,7 +9048,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) } task_setup_data.flags = RPC_TASK_ASYNC; } - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -9254,8 +9228,7 @@ static int _nfs41_test_stateid(struct nfs_server *server, &rpc_client, &msg); dprintk("NFS call test_stateid %p\n", stateid); - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); status = nfs4_call_sync_sequence(rpc_client, server, &msg, &args.seq_args, &res.seq_res); if (status != NFS_OK) { @@ -9347,7 +9320,17 @@ static const struct rpc_call_ops nfs41_free_stateid_ops = { .rpc_release = nfs41_free_stateid_release, }; -static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, +/** + * nfs41_free_stateid - perform a FREE_STATEID operation + * + * @server: server / transport on which to perform the operation + * @stateid: state ID to release + * @cred: credential + * @is_recovery: set to true if this call needs to be privileged + * + * Note: this function is always asynchronous. + */ +static int nfs41_free_stateid(struct nfs_server *server, const nfs4_stateid *stateid, struct rpc_cred *cred, bool privileged) @@ -9363,6 +9346,7 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, .flags = RPC_TASK_ASYNC, }; struct nfs_free_stateid_data *data; + struct rpc_task *task; nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID, &task_setup.rpc_client, &msg); @@ -9370,7 +9354,7 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, dprintk("NFS call free_stateid %p\n", stateid); data = kmalloc(sizeof(*data), GFP_NOFS); if (!data) - return ERR_PTR(-ENOMEM); + return -ENOMEM; data->server = server; nfs4_stateid_copy(&data->args.stateid, stateid); @@ -9378,31 +9362,8 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); - if (privileged) - nfs4_set_sequence_privileged(&data->args.seq_args); - - return rpc_run_task(&task_setup); -} - -/** - * nfs41_free_stateid - perform a FREE_STATEID operation - * - * @server: server / transport on which to perform the operation - * @stateid: state ID to release - * @cred: credential - * @is_recovery: set to true if this call needs to be privileged - * - * Note: this function is always asynchronous. - */ -static int nfs41_free_stateid(struct nfs_server *server, - const nfs4_stateid *stateid, - struct rpc_cred *cred, - bool is_recovery) -{ - struct rpc_task *task; - - task = _nfs41_free_stateid(server, stateid, cred, is_recovery); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, privileged); + task = rpc_run_task(&task_setup); if (IS_ERR(task)) return PTR_ERR(task); rpc_put_task(task); @@ -9539,7 +9500,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | NFS_CAP_ATOMIC_OPEN | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 - | NFS_CAP_ATOMIC_OPEN_V1, + | NFS_CAP_ATOMIC_OPEN_V1 + | NFS_CAP_LGOPEN, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, @@ -9564,6 +9526,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 + | NFS_CAP_LGOPEN | NFS_CAP_ALLOCATE | NFS_CAP_COPY | NFS_CAP_DEALLOCATE diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c10a422efe6f..2bf2eaa08ca7 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -77,6 +77,14 @@ const nfs4_stateid invalid_stateid = { .type = NFS4_INVALID_STATEID_TYPE, }; +const nfs4_stateid current_stateid = { + { + /* Funky initialiser keeps older gcc versions happy */ + .data = { 0x0, 0x0, 0x0, 0x1, 0 }, + }, + .type = NFS4_SPECIAL_STATEID_TYPE, +}; + static DEFINE_MUTEX(nfs_clid_init_mutex); int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 9b7392032321..738a7be019d2 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -65,7 +65,13 @@ /* Mapping from NFS error code to "errno" error code. */ #define errno_NFSERR_IO EIO +struct compound_hdr; static int nfs4_stat_to_errno(int); +static void encode_layoutget(struct xdr_stream *xdr, + const struct nfs4_layoutget_args *args, + struct compound_hdr *hdr); +static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, + struct nfs4_layoutget_res *res); /* NFSv4 COMPOUND tags are only wanted for debugging purposes */ #ifdef DEBUG @@ -424,6 +430,8 @@ static int nfs4_stat_to_errno(int); #define decode_sequence_maxsz 0 #define encode_layoutreturn_maxsz 0 #define decode_layoutreturn_maxsz 0 +#define encode_layoutget_maxsz 0 +#define decode_layoutget_maxsz 0 #endif /* CONFIG_NFS_V4_1 */ #define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ @@ -476,14 +484,16 @@ static int nfs4_stat_to_errno(int); encode_open_maxsz + \ encode_access_maxsz + \ encode_getfh_maxsz + \ - encode_getattr_maxsz) + encode_getattr_maxsz + \ + encode_layoutget_maxsz) #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_open_maxsz + \ decode_access_maxsz + \ decode_getfh_maxsz + \ - decode_getattr_maxsz) + decode_getattr_maxsz + \ + decode_layoutget_maxsz) #define NFS4_enc_open_confirm_sz \ (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -497,13 +507,15 @@ static int nfs4_stat_to_errno(int); encode_putfh_maxsz + \ encode_open_maxsz + \ encode_access_maxsz + \ - encode_getattr_maxsz) + encode_getattr_maxsz + \ + encode_layoutget_maxsz) #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_open_maxsz + \ decode_access_maxsz + \ - decode_getattr_maxsz) + decode_getattr_maxsz + \ + decode_layoutget_maxsz) #define NFS4_enc_open_downgrade_sz \ (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ @@ -2070,6 +2082,13 @@ encode_layoutreturn(struct xdr_stream *xdr, struct compound_hdr *hdr) { } + +static void +encode_layoutget(struct xdr_stream *xdr, + const struct nfs4_layoutget_args *args, + struct compound_hdr *hdr) +{ +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2316,6 +2335,12 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, if (args->access) encode_access(xdr, args->access, &hdr); encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); + if (args->lg_args) { + encode_layoutget(xdr, args->lg_args, &hdr); + xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, + args->lg_args->layout.pages, + 0, args->lg_args->layout.pglen); + } encode_nops(&hdr); } @@ -2356,6 +2381,12 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, if (args->access) encode_access(xdr, args->access, &hdr); encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); + if (args->lg_args) { + encode_layoutget(xdr, args->lg_args, &hdr); + xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, + args->lg_args->layout.pages, + 0, args->lg_args->layout.pglen); + } encode_nops(&hdr); } @@ -6024,7 +6055,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, status = decode_op_hdr(xdr, OP_LAYOUTGET); if (status) - return status; + goto out; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; @@ -6037,7 +6068,8 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, if (!layout_count) { dprintk("%s: server responded with empty layout array\n", __func__); - return -EINVAL; + status = -EINVAL; + goto out; } p = xdr_inline_decode(xdr, 28); @@ -6062,7 +6094,8 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, dprintk("NFS: server cheating in layoutget reply: " "layout len %u > recvd %u\n", res->layoutp->len, recvd); - return -EINVAL; + status = -EINVAL; + goto out; } if (layout_count > 1) { @@ -6075,10 +6108,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, __func__, layout_count); } - return 0; +out: + res->status = status; + return status; out_overflow: print_overflow_msg(__func__, xdr); - return -EIO; + status = -EIO; + goto out; } static int decode_layoutreturn(struct xdr_stream *xdr, @@ -6177,6 +6213,13 @@ int decode_layoutreturn(struct xdr_stream *xdr, { return 0; } + +static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, + struct nfs4_layoutget_res *res) +{ + return 0; +} + #endif /* CONFIG_NFS_V4_1 */ /* @@ -6623,6 +6666,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, if (res->access_request) decode_access(xdr, &res->access_supported, &res->access_result); decode_getfattr_label(xdr, res->f_attr, res->f_label, res->server); + if (res->lg_res) + decode_layoutget(xdr, rqstp, res->lg_res); out: return status; } @@ -6675,6 +6720,8 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, if (res->access_request) decode_access(xdr, &res->access_supported, &res->access_result); decode_getfattr(xdr, res->f_attr, res->server); + if (res->lg_res) + decode_layoutget(xdr, rqstp, res->lg_res); out: return status; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ee723aa153a3..d93942fa3817 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -37,6 +37,7 @@ #include "nfs4trace.h" #include "delegation.h" #include "nfs42.h" +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_PNFS #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) @@ -915,45 +916,99 @@ pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo) test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); } -/* - * Get layout from server. - * for now, assume that whole file layouts are requested. - * arg->offset: 0 - * arg->length: all ones - */ -static struct pnfs_layout_segment * -send_layoutget(struct pnfs_layout_hdr *lo, +static struct nfs_server * +pnfs_find_server(struct inode *inode, struct nfs_open_context *ctx) +{ + struct nfs_server *server; + + if (inode) { + server = NFS_SERVER(inode); + } else { + struct dentry *parent_dir = dget_parent(ctx->dentry); + server = NFS_SERVER(parent_dir->d_inode); + dput(parent_dir); + } + return server; +} + +static void nfs4_free_pages(struct page **pages, size_t size) +{ + int i; + + if (!pages) + return; + + for (i = 0; i < size; i++) { + if (!pages[i]) + break; + __free_page(pages[i]); + } + kfree(pages); +} + +static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) +{ + struct page **pages; + int i; + + pages = kcalloc(size, sizeof(struct page *), gfp_flags); + if (!pages) { + dprintk("%s: can't alloc array of %zu pages\n", __func__, size); + return NULL; + } + + for (i = 0; i < size; i++) { + pages[i] = alloc_page(gfp_flags); + if (!pages[i]) { + dprintk("%s: failed to allocate page\n", __func__); + nfs4_free_pages(pages, size); + return NULL; + } + } + + return pages; +} + +static struct nfs4_layoutget * +pnfs_alloc_init_layoutget_args(struct inode *ino, struct nfs_open_context *ctx, - nfs4_stateid *stateid, + const nfs4_stateid *stateid, const struct pnfs_layout_range *range, - long *timeout, gfp_t gfp_flags) + gfp_t gfp_flags) { - struct inode *ino = lo->plh_inode; - struct nfs_server *server = NFS_SERVER(ino); + struct nfs_server *server = pnfs_find_server(ino, ctx); + size_t max_pages = max_response_pages(server); struct nfs4_layoutget *lgp; - loff_t i_size; dprintk("--> %s\n", __func__); - /* - * Synchronously retrieve layout information from server and - * store in lseg. If we race with a concurrent seqid morphing - * op, then re-send the LAYOUTGET. - */ lgp = kzalloc(sizeof(*lgp), gfp_flags); if (lgp == NULL) - return ERR_PTR(-ENOMEM); + return NULL; + + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); + if (!lgp->args.layout.pages) { + kfree(lgp); + return NULL; + } + lgp->args.layout.pglen = max_pages * PAGE_SIZE; + lgp->res.layoutp = &lgp->args.layout; - i_size = i_size_read(ino); + /* Don't confuse uninitialised result and success */ + lgp->res.status = -NFS4ERR_DELAY; lgp->args.minlength = PAGE_SIZE; if (lgp->args.minlength > range->length) lgp->args.minlength = range->length; - if (range->iomode == IOMODE_READ) { - if (range->offset >= i_size) - lgp->args.minlength = 0; - else if (i_size - range->offset < lgp->args.minlength) - lgp->args.minlength = i_size - range->offset; + if (ino) { + loff_t i_size = i_size_read(ino); + + if (range->iomode == IOMODE_READ) { + if (range->offset >= i_size) + lgp->args.minlength = 0; + else if (i_size - range->offset < lgp->args.minlength) + lgp->args.minlength = i_size - range->offset; + } } lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; pnfs_copy_range(&lgp->args.range, range); @@ -962,9 +1017,21 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.ctx = get_nfs_open_context(ctx); nfs4_stateid_copy(&lgp->args.stateid, stateid); lgp->gfp_flags = gfp_flags; - lgp->cred = lo->plh_lc_cred; + lgp->cred = get_rpccred(ctx->cred); + lgp->callback_count = raw_seqcount_begin(&server->nfs_client->cl_callback_count); + return lgp; +} - return nfs4_proc_layoutget(lgp, timeout, gfp_flags); +void pnfs_layoutget_free(struct nfs4_layoutget *lgp) +{ + size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE; + + nfs4_free_pages(lgp->args.layout.pages, max_pages); + if (lgp->args.inode) + pnfs_put_layout_hdr(NFS_I(lgp->args.inode)->layout); + put_rpccred(lgp->cred); + put_nfs_open_context(lgp->args.ctx); + kfree(lgp); } static void pnfs_clear_layoutcommit(struct inode *inode, @@ -1671,6 +1738,22 @@ static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); } +static void _add_to_server_list(struct pnfs_layout_hdr *lo, + struct nfs_server *server) +{ + if (list_empty(&lo->plh_layouts)) { + struct nfs_client *clp = server->nfs_client; + + /* The lo must be on the clp list if there is any + * chance of a CB_LAYOUTRECALL(FILE) coming in. + */ + spin_lock(&clp->cl_lock); + if (list_empty(&lo->plh_layouts)) + list_add_tail(&lo->plh_layouts, &server->layouts); + spin_unlock(&clp->cl_lock); + } +} + /* * Layout segment is retreived from the server if not cached. * The appropriate layout segment is referenced and returned to the caller. @@ -1694,6 +1777,7 @@ pnfs_update_layout(struct inode *ino, struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo = NULL; struct pnfs_layout_segment *lseg = NULL; + struct nfs4_layoutget *lgp; nfs4_stateid stateid; long timeout = 0; unsigned long giveup = jiffies + (clp->cl_lease_time << 1); @@ -1820,15 +1904,7 @@ lookup_again: atomic_inc(&lo->plh_outstanding); spin_unlock(&ino->i_lock); - if (list_empty(&lo->plh_layouts)) { - /* The lo must be on the clp list if there is any - * chance of a CB_LAYOUTRECALL(FILE) coming in. - */ - spin_lock(&clp->cl_lock); - if (list_empty(&lo->plh_layouts)) - list_add_tail(&lo->plh_layouts, &server->layouts); - spin_unlock(&clp->cl_lock); - } + _add_to_server_list(lo, server); pg_offset = arg.offset & ~PAGE_MASK; if (pg_offset) { @@ -1838,7 +1914,15 @@ lookup_again: if (arg.length != NFS4_MAX_UINT64) arg.length = PAGE_ALIGN(arg.length); - lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags); + lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags); + if (!lgp) { + trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL, + PNFS_UPDATE_LAYOUT_NOMEM); + atomic_dec(&lo->plh_outstanding); + goto out_put_layout_hdr; + } + + lseg = nfs4_proc_layoutget(lgp, &timeout); trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); atomic_dec(&lo->plh_outstanding); @@ -1919,6 +2003,171 @@ pnfs_sanity_check_layout_range(struct pnfs_layout_range *range) return true; } +static struct pnfs_layout_hdr * +_pnfs_grab_empty_layout(struct inode *ino, struct nfs_open_context *ctx) +{ + struct pnfs_layout_hdr *lo; + + spin_lock(&ino->i_lock); + lo = pnfs_find_alloc_layout(ino, ctx, GFP_KERNEL); + if (!lo) + goto out_unlock; + if (!test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) + goto out_unlock; + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + goto out_unlock; + if (pnfs_layoutgets_blocked(lo)) + goto out_unlock; + if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags)) + goto out_unlock; + atomic_inc(&lo->plh_outstanding); + spin_unlock(&ino->i_lock); + _add_to_server_list(lo, NFS_SERVER(ino)); + return lo; + +out_unlock: + spin_unlock(&ino->i_lock); + pnfs_put_layout_hdr(lo); + return NULL; +} + +extern const nfs4_stateid current_stateid; + +static void _lgopen_prepare_attached(struct nfs4_opendata *data, + struct nfs_open_context *ctx) +{ + struct inode *ino = data->dentry->d_inode; + struct pnfs_layout_range rng = { + .iomode = (data->o_arg.fmode & FMODE_WRITE) ? + IOMODE_RW: IOMODE_READ, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; + struct nfs4_layoutget *lgp; + struct pnfs_layout_hdr *lo; + + /* Heuristic: don't send layoutget if we have cached data */ + if (rng.iomode == IOMODE_READ && + (i_size_read(ino) == 0 || ino->i_mapping->nrpages != 0)) + return; + + lo = _pnfs_grab_empty_layout(ino, ctx); + if (!lo) + return; + lgp = pnfs_alloc_init_layoutget_args(ino, ctx, ¤t_stateid, + &rng, GFP_KERNEL); + if (!lgp) { + pnfs_clear_first_layoutget(lo); + pnfs_put_layout_hdr(lo); + return; + } + data->lgp = lgp; + data->o_arg.lg_args = &lgp->args; + data->o_res.lg_res = &lgp->res; +} + +static void _lgopen_prepare_floating(struct nfs4_opendata *data, + struct nfs_open_context *ctx) +{ + struct pnfs_layout_range rng = { + .iomode = (data->o_arg.fmode & FMODE_WRITE) ? + IOMODE_RW: IOMODE_READ, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; + struct nfs4_layoutget *lgp; + + lgp = pnfs_alloc_init_layoutget_args(NULL, ctx, ¤t_stateid, + &rng, GFP_KERNEL); + if (!lgp) + return; + data->lgp = lgp; + data->o_arg.lg_args = &lgp->args; + data->o_res.lg_res = &lgp->res; +} + +void pnfs_lgopen_prepare(struct nfs4_opendata *data, + struct nfs_open_context *ctx) +{ + struct nfs_server *server = NFS_SERVER(data->dir->d_inode); + + if (!(pnfs_enabled_sb(server) && + server->pnfs_curr_ld->flags & PNFS_LAYOUTGET_ON_OPEN)) + return; + /* Could check on max_ops, but currently hardcoded high enough */ + if (!nfs_server_capable(data->dir->d_inode, NFS_CAP_LGOPEN)) + return; + if (data->state) + _lgopen_prepare_attached(data, ctx); + else + _lgopen_prepare_floating(data, ctx); +} + +void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, + struct nfs_open_context *ctx) +{ + struct pnfs_layout_hdr *lo; + struct pnfs_layout_segment *lseg; + struct nfs_server *srv = NFS_SERVER(ino); + u32 iomode; + + if (!lgp) + return; + dprintk("%s: entered with status %i\n", __func__, lgp->res.status); + if (lgp->res.status) { + switch (lgp->res.status) { + default: + break; + /* + * Halt lgopen attempts if the server doesn't recognise + * the "current stateid" value, the layout type, or the + * layoutget operation as being valid. + * Also if it complains about too many ops in the compound + * or of the request/reply being too big. + */ + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_NOTSUPP: + case -NFS4ERR_REP_TOO_BIG: + case -NFS4ERR_REP_TOO_BIG_TO_CACHE: + case -NFS4ERR_REQ_TOO_BIG: + case -NFS4ERR_TOO_MANY_OPS: + case -NFS4ERR_UNKNOWN_LAYOUTTYPE: + srv->caps &= ~NFS_CAP_LGOPEN; + } + return; + } + if (!lgp->args.inode) { + lo = _pnfs_grab_empty_layout(ino, ctx); + if (!lo) + return; + lgp->args.inode = ino; + } else + lo = NFS_I(lgp->args.inode)->layout; + + if (read_seqcount_retry(&srv->nfs_client->cl_callback_count, + lgp->callback_count)) + return; + lseg = pnfs_layout_process(lgp); + if (!IS_ERR(lseg)) { + iomode = lgp->args.range.iomode; + pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); + pnfs_put_lseg(lseg); + } +} + +void nfs4_lgopen_release(struct nfs4_layoutget *lgp) +{ + if (lgp != NULL) { + struct inode *inode = lgp->args.inode; + if (inode) { + struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; + atomic_dec(&lo->plh_outstanding); + pnfs_clear_first_layoutget(lo); + } + pnfs_layoutget_free(lgp); + } +} + struct pnfs_layout_segment * pnfs_layout_process(struct nfs4_layoutget *lgp) { @@ -1984,8 +2233,6 @@ out_forget: spin_unlock(&ino->i_lock); lseg->pls_layout = lo; NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); - if (!pnfs_layout_is_valid(lo)) - nfs_commit_inode(ino, 0); return ERR_PTR(-EAGAIN); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index daf6cbf5c15f..a8f5e6b16749 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -35,6 +35,8 @@ #include <linux/nfs_page.h> #include <linux/workqueue.h> +struct nfs4_opendata; + enum { NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ NFS_LSEG_ROC, /* roc bit received from server */ @@ -110,6 +112,7 @@ enum layoutdriver_policy_flags { PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, PNFS_LAYOUTRET_ON_ERROR = 1 << 1, PNFS_READ_WHOLE_PAGE = 1 << 2, + PNFS_LAYOUTGET_ON_OPEN = 1 << 3, }; struct nfs4_deviceid_node; @@ -223,10 +226,11 @@ extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); /* nfs4proc.c */ +extern size_t max_response_pages(struct nfs_server *server); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev, struct rpc_cred *cred); -extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags); +extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); /* pnfs.c */ @@ -246,6 +250,7 @@ size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); +void pnfs_layoutget_free(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); void pnfs_destroy_all_layouts(struct nfs_client *); @@ -375,6 +380,11 @@ void pnfs_layout_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo, u32 ds_commit_idx); +void pnfs_lgopen_prepare(struct nfs4_opendata *data, + struct nfs_open_context *ctx); +void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, + struct nfs_open_context *ctx); +void nfs4_lgopen_release(struct nfs4_layoutget *lgp); static inline bool nfs_have_layout(struct inode *inode) { @@ -775,6 +785,22 @@ static inline bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, { return false; } + +static inline void pnfs_lgopen_prepare(struct nfs4_opendata *data, + struct nfs_open_context *ctx) +{ +} + +static inline void pnfs_parse_lgopen(struct inode *ino, + struct nfs4_layoutget *lgp, + struct nfs_open_context *ctx) +{ +} + +static inline void nfs4_lgopen_release(struct nfs4_layoutget *lgp) +{ +} + #endif /* CONFIG_NFS_V4_1 */ #if IS_ENABLED(CONFIG_NFS_V4_2) diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 4e93d6308733..e0c257bd62b9 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -99,7 +99,8 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, */ static int nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr, struct nfs4_label *label, + struct inode *inode) { struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], @@ -321,7 +322,9 @@ nfs_proc_remove(struct inode *dir, struct dentry *dentry) } static void -nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) +nfs_proc_unlink_setup(struct rpc_message *msg, + struct dentry *dentry, + struct inode *inode) { msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; } @@ -618,7 +621,8 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) } static void nfs_proc_write_setup(struct nfs_pgio_header *hdr, - struct rpc_message *msg) + struct rpc_message *msg, + struct rpc_clnt **clnt) { /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ hdr->args.stable = NFS_FILE_SYNC; @@ -631,7 +635,8 @@ static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit } static void -nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) +nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg, + struct rpc_clnt **clnt) { BUG(); } diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index bf54fc9ae135..fd61bf0fce63 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -85,7 +85,7 @@ static const struct rpc_call_ops nfs_unlink_ops = { .rpc_call_prepare = nfs_unlink_prepare, }; -static void nfs_do_call_unlink(struct nfs_unlinkdata *data) +static void nfs_do_call_unlink(struct inode *inode, struct nfs_unlinkdata *data) { struct rpc_message msg = { .rpc_argp = &data->args, @@ -105,7 +105,7 @@ static void nfs_do_call_unlink(struct nfs_unlinkdata *data) data->args.fh = NFS_FH(dir); nfs_fattr_init(data->res.dir_attr); - NFS_PROTO(dir)->unlink_setup(&msg, data->dentry); + NFS_PROTO(dir)->unlink_setup(&msg, data->dentry, inode); task_setup_data.rpc_client = NFS_CLIENT(dir); task = rpc_run_task(&task_setup_data); @@ -113,7 +113,7 @@ static void nfs_do_call_unlink(struct nfs_unlinkdata *data) rpc_put_task_async(task); } -static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) +static int nfs_call_unlink(struct dentry *dentry, struct inode *inode, struct nfs_unlinkdata *data) { struct inode *dir = d_inode(dentry->d_parent); struct dentry *alias; @@ -153,7 +153,7 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) return ret; } data->dentry = alias; - nfs_do_call_unlink(data); + nfs_do_call_unlink(inode, data); return 1; } @@ -231,7 +231,7 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode) dentry->d_fsdata = NULL; spin_unlock(&dentry->d_lock); - if (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)) + if (NFS_STALE(inode) || !nfs_call_unlink(dentry, inode, data)) nfs_free_unlinkdata(data); } @@ -448,6 +448,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) unsigned char silly[SILLYNAME_LEN + 1]; unsigned long long fileid; struct dentry *sdentry; + struct inode *inode = d_inode(dentry); struct rpc_task *task; int error = -EBUSY; @@ -485,6 +486,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) goto out; } while (d_inode(sdentry) != NULL); /* need negative lookup */ + ihold(inode); + /* queue unlink first. Can't do this from rpc_release as it * has to allocate memory */ @@ -509,6 +512,12 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) case 0: /* The rename succeeded */ nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + spin_lock(&inode->i_lock); + NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter(); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME + | NFS_INO_REVAL_FORCED; + spin_unlock(&inode->i_lock); d_move(dentry, sdentry); break; case -ERESTARTSYS: @@ -519,6 +528,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) } rpc_put_task(task); out_dput: + iput(inode); dput(sdentry); out: return error; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0193053bc139..a057b4f45a46 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1375,12 +1375,9 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, int priority = flush_task_priority(how); task_setup_data->priority = priority; - rpc_ops->write_setup(hdr, msg); + rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client); trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes, hdr->args.stable); - - nfs4_state_protect_write(NFS_SERVER(hdr->inode)->nfs_client, - &task_setup_data->rpc_client, msg, hdr); } /* If a nfs_flush_* function fails, it should remove reqs from @head and @@ -1669,14 +1666,11 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, .priority = priority, }; /* Set up the initial task struct. */ - nfs_ops->commit_setup(data, &msg); + nfs_ops->commit_setup(data, &msg, &task_setup_data.rpc_client); trace_nfs_initiate_commit(data); dprintk("NFS: initiated commit call\n"); - nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client, - NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg); - task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 2410b093a2e6..b0555d7d8200 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1201,6 +1201,28 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, break; case S_IFDIR: host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); + if (!host_err && unlikely(d_unhashed(dchild))) { + struct dentry *d; + d = lookup_one_len(dchild->d_name.name, + dchild->d_parent, + dchild->d_name.len); + if (IS_ERR(d)) { + host_err = PTR_ERR(d); + break; + } + if (unlikely(d_is_negative(d))) { + dput(d); + err = nfserr_serverfault; + goto out; + } + dput(resfhp->fh_dentry); + resfhp->fh_dentry = dget(d); + err = fh_update(resfhp); + dput(dchild); + dchild = d; + if (err) + goto out; + } break; case S_IFCHR: case S_IFBLK: diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 1a2894aa0194..dd52d3f82e8d 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -46,8 +46,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) int err = nilfs_add_link(dentry, inode); if (!err) { - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); return 0; } inode_dec_link_count(inode); @@ -243,8 +242,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out_fail; nilfs_mark_inode_dirty(inode); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); out: if (!err) err = nilfs_transaction_commit(dir->i_sb); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 91a8889abf9b..ea8c551bcd7e 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -570,16 +570,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, current_page, vec_len, vec_start); len = bio_add_page(bio, page, vec_len, vec_start); - if (len != vec_len) { - mlog(ML_ERROR, "Adding page[%d] to bio failed, " - "page %p, len %d, vec_len %u, vec_start %u, " - "bi_sector %llu\n", current_page, page, len, - vec_len, vec_start, - (unsigned long long)bio->bi_iter.bi_sector); - bio_put(bio); - bio = ERR_PTR(-EIO); - return bio; - } + if (len != vec_len) break; cs += vec_len / (PAGE_SIZE/spp); vec_start = 0; diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 01c6b3894406..7869622af22a 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4250,10 +4250,11 @@ out: static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, bool preserve) { - int error; + int error, had_lock; struct inode *inode = d_inode(old_dentry); struct buffer_head *old_bh = NULL; struct inode *new_orphan_inode = NULL; + struct ocfs2_lock_holder oh; if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) return -EOPNOTSUPP; @@ -4295,6 +4296,14 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, goto out; } + had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1, + &oh); + if (had_lock < 0) { + error = had_lock; + mlog_errno(error); + goto out; + } + /* If the security isn't preserved, we need to re-initialize them. */ if (!preserve) { error = ocfs2_init_security_and_acl(dir, new_orphan_inode, @@ -4302,14 +4311,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, if (error) mlog_errno(error); } -out: if (!error) { error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, new_dentry); if (error) mlog_errno(error); } + ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock); +out: if (new_orphan_inode) { /* * We need to open_unlock the inode no matter whether we diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 6e3134e6d98a..1b5707c44c3f 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -75,8 +75,7 @@ static int orangefs_create(struct inode *dir, get_khandle_from_ino(inode), dentry); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); orangefs_set_timeout(dentry); ORANGEFS_I(inode)->getattr_time = jiffies - 1; ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; @@ -332,8 +331,7 @@ static int orangefs_symlink(struct inode *dir, "Assigned symlink inode new number of %pU\n", get_khandle_from_ino(inode)); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); orangefs_set_timeout(dentry); ORANGEFS_I(inode)->getattr_time = jiffies - 1; ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; @@ -402,8 +400,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode "Assigned dir inode new number of %pU\n", get_khandle_from_ino(inode)); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); orangefs_set_timeout(dentry); ORANGEFS_I(inode)->getattr_time = jiffies - 1; ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; diff --git a/fs/proc/array.c b/fs/proc/array.c index ae2c807fd719..72391b3f6927 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -85,6 +85,7 @@ #include <linux/delayacct.h> #include <linux/seq_file.h> #include <linux/pid_namespace.h> +#include <linux/prctl.h> #include <linux/ptrace.h> #include <linux/tracehook.h> #include <linux/string_helpers.h> @@ -335,6 +336,30 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) #ifdef CONFIG_SECCOMP seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode); #endif + seq_printf(m, "\nSpeculation_Store_Bypass:\t"); + switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) { + case -EINVAL: + seq_printf(m, "unknown"); + break; + case PR_SPEC_NOT_AFFECTED: + seq_printf(m, "not vulnerable"); + break; + case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE: + seq_printf(m, "thread force mitigated"); + break; + case PR_SPEC_PRCTL | PR_SPEC_DISABLE: + seq_printf(m, "thread mitigated"); + break; + case PR_SPEC_PRCTL | PR_SPEC_ENABLE: + seq_printf(m, "thread vulnerable"); + break; + case PR_SPEC_DISABLE: + seq_printf(m, "globally mitigated"); + break; + default: + seq_printf(m, "vulnerable"); + break; + } seq_putc(m, '\n'); } diff --git a/fs/proc/base.c b/fs/proc/base.c index 1b2ede6abcdf..1a76d751cf3c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -261,7 +261,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, * Inherently racy -- command line shares address space * with code and data. */ - rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0); + rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON); if (rv <= 0) goto out_free_page; @@ -279,7 +279,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, int nr_read; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -325,7 +325,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, bool final; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -946,7 +946,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, max_len = min_t(size_t, PAGE_SIZE, count); this_len = min(max_len, this_len); - retval = access_remote_vm(mm, (env_start + src), page, this_len, 0); + retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON); if (retval <= 0) { ret = retval; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index d1e82761de81..e64ecb9f2720 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) { struct list_head *head = (struct list_head *)arg; struct kcore_list *ent; + struct page *p; + + if (!pfn_valid(pfn)) + return 1; + + p = pfn_to_page(pfn); + if (!memmap_valid_within(pfn, p, page_zone(p))) + return 1; ent = kmalloc(sizeof(*ent), GFP_KERNEL); if (!ent) return -ENOMEM; - ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT)); + ent->addr = (unsigned long)page_to_virt(p); ent->size = nr_pages << PAGE_SHIFT; - /* Sanity check: Can happen in 32bit arch...maybe */ - if (ent->addr < (unsigned long) __va(0)) + if (!virt_addr_valid(ent->addr)) goto free_out; /* cut not-mapped area. ....from ppc-32 code. */ if (ULONG_MAX - ent->addr < ent->size) ent->size = ULONG_MAX - ent->addr; - /* cut when vmalloc() area is higher than direct-map area */ - if (VMALLOC_START > (unsigned long)__va(0)) { - if (ent->addr > VMALLOC_START) - goto free_out; + /* + * We've already checked virt_addr_valid so we know this address + * is a valid pointer, therefore we can check against it to determine + * if we need to trim + */ + if (VMALLOC_START > ent->addr) { if (VMALLOC_START - ent->addr < ent->size) ent->size = VMALLOC_START - ent->addr; } diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index bd39a998843d..5089dac02660 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -687,8 +687,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod reiserfs_update_inode_transaction(inode); reiserfs_update_inode_transaction(dir); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); retval = journal_end(&th); out_failed: @@ -771,8 +770,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode goto out_failed; } - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); retval = journal_end(&th); out_failed: @@ -871,8 +869,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode /* the above add_entry did not update dir's stat data */ reiserfs_update_sd(&th, dir); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); retval = journal_end(&th); out_failed: reiserfs_write_unlock(dir->i_sb); @@ -1187,8 +1184,7 @@ static int reiserfs_symlink(struct inode *parent_dir, goto out_failed; } - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); retval = journal_end(&th); out_failed: reiserfs_write_unlock(parent_dir->i_sb); diff --git a/fs/seq_file.c b/fs/seq_file.c index c6c27f1f9c98..4cc090b50cc5 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -709,11 +709,6 @@ void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, if (m->count + width >= m->size) goto overflow; - if (num < 10) { - m->buf[m->count++] = num + '0'; - return; - } - len = num_to_str(m->buf + m->count, m->size - m->count, num, width); if (!len) goto overflow; diff --git a/fs/super.c b/fs/super.c index 122c402049a2..4b5b562176d0 100644 --- a/fs/super.c +++ b/fs/super.c @@ -121,13 +121,23 @@ static unsigned long super_cache_count(struct shrinker *shrink, sb = container_of(shrink, struct super_block, s_shrink); /* - * Don't call trylock_super as it is a potential - * scalability bottleneck. The counts could get updated - * between super_cache_count and super_cache_scan anyway. - * Call to super_cache_count with shrinker_rwsem held - * ensures the safety of call to list_lru_shrink_count() and - * s_op->nr_cached_objects(). + * We don't call trylock_super() here as it is a scalability bottleneck, + * so we're exposed to partial setup state. The shrinker rwsem does not + * protect filesystem operations backing list_lru_shrink_count() or + * s_op->nr_cached_objects(). Counts can change between + * super_cache_count and super_cache_scan, so we really don't need locks + * here. + * + * However, if we are currently mounting the superblock, the underlying + * filesystem might be in a state of partial construction and hence it + * is dangerous to access it. trylock_super() uses a SB_BORN check to + * avoid this situation, so do the same here. The memory barrier is + * matched with the one in mount_fs() as we don't hold locks here. */ + if (!(sb->s_flags & SB_BORN)) + return 0; + smp_rmb(); + if (sb->s_op && sb->s_op->nr_cached_objects) total_objects = sb->s_op->nr_cached_objects(sb, sc); @@ -1272,6 +1282,14 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) sb = root->d_sb; BUG_ON(!sb); WARN_ON(!sb->s_bdi); + + /* + * Write barrier is for super_cache_count(). We place it before setting + * SB_BORN as the data dependency between the two functions is the + * superblock structure contents that we just set up, not the SB_BORN + * flag. + */ + smp_wmb(); sb->s_flags |= SB_BORN; error = security_sb_kern_mount(sb, flags, secdata); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index b428d317ae92..92682fcc41f6 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -25,7 +25,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, { struct dentry *root; void *ns; - bool new_sb; + bool new_sb = false; if (!(flags & SB_KERNMOUNT)) { if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) @@ -35,9 +35,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); root = kernfs_mount_ns(fs_type, flags, sysfs_root, SYSFS_MAGIC, &new_sb, ns); - if (IS_ERR(root) || !new_sb) + if (!new_sb) kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); - else if (new_sb) + else if (!IS_ERR(root)) root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE; return root; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 0458dd47e105..c586026508db 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -622,8 +622,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode) if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; } @@ -733,8 +732,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inc_nlink(dir); dir->i_ctime = dir->i_mtime = current_time(dir); mark_inode_dirty(dir); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 32545cd00ceb..d5f43ba76c59 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -39,8 +39,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode) { int err = ufs_add_link(dentry, inode); if (!err) { - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; } inode_dec_link_count(inode); @@ -193,8 +192,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) if (err) goto out_fail; - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; out_fail: |