diff options
Diffstat (limited to 'fs')
59 files changed, 1276 insertions, 479 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index bf7e3f23bba7..670700cb1110 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1761,6 +1761,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) btrfs_err(info, "bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups", cache->key.objectid); + btrfs_put_block_group(cache); ret = -EINVAL; goto error; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 19d669d12ca1..fe2b8765d9e6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -734,8 +734,6 @@ struct btrfs_fs_info { struct btrfs_workqueue *fixup_workers; struct btrfs_workqueue *delayed_workers; - /* the extent workers do delayed refs on the extent allocation tree */ - struct btrfs_workqueue *extent_workers; struct task_struct *transaction_kthread; struct task_struct *cleaner_kthread; u32 thread_pool_size; @@ -2489,8 +2487,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, int nitems, bool use_global_rsv); void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *rsv); -void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, - bool qgroup_free); +void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes); int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes); u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index d949d7d2abed..db9f2c58eb4a 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -381,7 +381,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) out_qgroup: btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve); out_fail: - btrfs_inode_rsv_release(inode, true); if (delalloc_lock) mutex_unlock(&inode->delalloc_mutex); return ret; @@ -418,7 +417,6 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, * btrfs_delalloc_release_extents - release our outstanding_extents * @inode: the inode to balance the reservation for. * @num_bytes: the number of bytes we originally reserved with - * @qgroup_free: do we need to free qgroup meta reservation or convert them. * * When we reserve space we increase outstanding_extents for the extents we may * add. Once we've set the range as delalloc or created our ordered extents we @@ -426,8 +424,7 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, * temporarily tracked outstanding_extents. This _must_ be used in conjunction * with btrfs_delalloc_reserve_metadata. */ -void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, - bool qgroup_free) +void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes) { struct btrfs_fs_info *fs_info = inode->root->fs_info; unsigned num_extents; @@ -441,7 +438,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, if (btrfs_is_testing(fs_info)) return; - btrfs_inode_rsv_release(inode, qgroup_free); + btrfs_inode_rsv_release(inode, true); } /** diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 044981cf6df9..402b61bf345c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2008,7 +2008,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->readahead_workers); btrfs_destroy_workqueue(fs_info->flush_workers); btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); - btrfs_destroy_workqueue(fs_info->extent_workers); /* * Now that all other work queues are destroyed, we can safely destroy * the queues used for metadata I/O, since tasks from those other work @@ -2214,10 +2213,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, max_active, 2); fs_info->qgroup_rescan_workers = btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0); - fs_info->extent_workers = - btrfs_alloc_workqueue(fs_info, "extent-refs", flags, - min_t(u64, fs_devices->num_devices, - max_active), 8); if (!(fs_info->workers && fs_info->delalloc_workers && fs_info->submit_workers && fs_info->flush_workers && @@ -2228,7 +2223,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->caching_workers && fs_info->readahead_workers && fs_info->fixup_workers && fs_info->delayed_workers && - fs_info->extent_workers && fs_info->qgroup_rescan_workers)) { return -ENOMEM; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 27e5b269e729..435a502a3226 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1692,7 +1692,7 @@ again: force_page_uptodate); if (ret) { btrfs_delalloc_release_extents(BTRFS_I(inode), - reserve_bytes, true); + reserve_bytes); break; } @@ -1704,7 +1704,7 @@ again: if (extents_locked == -EAGAIN) goto again; btrfs_delalloc_release_extents(BTRFS_I(inode), - reserve_bytes, true); + reserve_bytes); ret = extents_locked; break; } @@ -1772,8 +1772,7 @@ again: else free_extent_state(cached_state); - btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, - true); + btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); if (ret) { btrfs_drop_pages(pages, num_pages); break; @@ -2068,25 +2067,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) struct btrfs_trans_handle *trans; struct btrfs_log_ctx ctx; int ret = 0, err; - u64 len; - /* - * If the inode needs a full sync, make sure we use a full range to - * avoid log tree corruption, due to hole detection racing with ordered - * extent completion for adjacent ranges, and assertion failures during - * hole detection. - */ - if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, - &BTRFS_I(inode)->runtime_flags)) { - start = 0; - end = LLONG_MAX; - } - - /* - * The range length can be represented by u64, we have to do the typecasts - * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() - */ - len = (u64)end - (u64)start + 1; trace_btrfs_sync_file(file, datasync); btrfs_init_log_ctx(&ctx, inode); @@ -2113,6 +2094,19 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); /* + * If the inode needs a full sync, make sure we use a full range to + * avoid log tree corruption, due to hole detection racing with ordered + * extent completion for adjacent ranges, and assertion failures during + * hole detection. Do this while holding the inode lock, to avoid races + * with other tasks. + */ + if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags)) { + start = 0; + end = LLONG_MAX; + } + + /* * Before we acquired the inode's lock, someone may have dirtied more * pages in the target range. We need to make sure that writeback for * any such pages does not start while we are logging the inode, because @@ -2139,8 +2133,11 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) /* * We have to do this here to avoid the priority inversion of waiting on * IO of a lower priority task while holding a transaction open. + * + * Also, the range length can be represented by u64, we have to do the + * typecasts to avoid signed overflow if it's [0, LLONG_MAX]. */ - ret = btrfs_wait_ordered_range(inode, start, len); + ret = btrfs_wait_ordered_range(inode, start, (u64)end - (u64)start + 1); if (ret) { up_write(&BTRFS_I(inode)->dio_sem); inode_unlock(inode); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 63cad7865d75..37345fb6191d 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -501,13 +501,13 @@ again: ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, prealloc, prealloc, &alloc_hint); if (ret) { - btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, true); + btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc); btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc, true); goto out_put; } ret = btrfs_write_out_ino_cache(root, trans, path, inode); - btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, false); + btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc); out_put: iput(inode); out_release: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0f2754eaa05b..c3f386b7cc0b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2206,7 +2206,7 @@ again: ClearPageChecked(page); set_page_dirty(page); - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false); + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, &cached_state); @@ -4951,7 +4951,7 @@ again: if (!page) { btrfs_delalloc_release_space(inode, data_reserved, block_start, blocksize, true); - btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true); + btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); ret = -ENOMEM; goto out; } @@ -5018,7 +5018,7 @@ out_unlock: if (ret) btrfs_delalloc_release_space(inode, data_reserved, block_start, blocksize, true); - btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0)); + btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); unlock_page(page); put_page(page); out: @@ -8709,7 +8709,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, data_reserved, offset, count - (size_t)ret, true); - btrfs_delalloc_release_extents(BTRFS_I(inode), count, false); + btrfs_delalloc_release_extents(BTRFS_I(inode), count); } out: if (wakeup) @@ -9059,7 +9059,7 @@ again: unlock_extent_cached(io_tree, page_start, page_end, &cached_state); if (!ret2) { - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true); + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); sb_end_pagefault(inode->i_sb); extent_changeset_free(data_reserved); return VM_FAULT_LOCKED; @@ -9068,7 +9068,7 @@ again: out_unlock: unlock_page(page); out: - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0)); + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); btrfs_delalloc_release_space(inode, data_reserved, page_start, reserved_space, (ret != 0)); out_noreserve: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index de730e56d3f5..7c145a41decd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1360,8 +1360,7 @@ again: unlock_page(pages[i]); put_page(pages[i]); } - btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT, - false); + btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); extent_changeset_free(data_reserved); return i_done; out: @@ -1372,8 +1371,7 @@ out: btrfs_delalloc_release_space(inode, data_reserved, start_index << PAGE_SHIFT, page_cnt << PAGE_SHIFT, true); - btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT, - true); + btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); extent_changeset_free(data_reserved); return ret; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index c4bb69941c77..3ad151655eb8 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3629,7 +3629,7 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, return 0; BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); - trace_qgroup_meta_reserve(root, type, (s64)num_bytes); + trace_qgroup_meta_reserve(root, (s64)num_bytes, type); ret = qgroup_reserve(root, num_bytes, enforce, type); if (ret < 0) return ret; @@ -3676,7 +3676,7 @@ void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes, */ num_bytes = sub_root_meta_rsv(root, num_bytes, type); BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); - trace_qgroup_meta_reserve(root, type, -(s64)num_bytes); + trace_qgroup_meta_reserve(root, -(s64)num_bytes, type); btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid, num_bytes, type); } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 00504657b602..5cd42b66818c 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3277,6 +3277,8 @@ static int relocate_file_extent_cluster(struct inode *inode, if (!page) { btrfs_delalloc_release_metadata(BTRFS_I(inode), PAGE_SIZE, true); + btrfs_delalloc_release_extents(BTRFS_I(inode), + PAGE_SIZE); ret = -ENOMEM; goto out; } @@ -3297,7 +3299,7 @@ static int relocate_file_extent_cluster(struct inode *inode, btrfs_delalloc_release_metadata(BTRFS_I(inode), PAGE_SIZE, true); btrfs_delalloc_release_extents(BTRFS_I(inode), - PAGE_SIZE, true); + PAGE_SIZE); ret = -EIO; goto out; } @@ -3326,7 +3328,7 @@ static int relocate_file_extent_cluster(struct inode *inode, btrfs_delalloc_release_metadata(BTRFS_I(inode), PAGE_SIZE, true); btrfs_delalloc_release_extents(BTRFS_I(inode), - PAGE_SIZE, true); + PAGE_SIZE); clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, @@ -3342,8 +3344,7 @@ static int relocate_file_extent_cluster(struct inode *inode, put_page(page); index++; - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, - false); + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); balance_dirty_pages_ratelimited(inode->i_mapping); btrfs_throttle(fs_info); } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c049c7b3aa87..1a135d1b85bd 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -169,7 +169,13 @@ cifs_read_super(struct super_block *sb) else sb->s_maxbytes = MAX_NON_LFS; - /* Some very old servers like DOS and OS/2 used 2 second granularity */ + /* + * Some very old servers like DOS and OS/2 used 2 second granularity + * (while all current servers use 100ns granularity - see MS-DTYP) + * but 1 second is the maximum allowed granularity for the VFS + * so for old servers set time granularity to 1 second while for + * everything else (current servers) set it to 100ns. + */ if ((tcon->ses->server->vals->protocol_id == SMB10_PROT_ID) && ((tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find) == 0) && diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 50dfd9049370..d78bfcc19156 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1391,6 +1391,11 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file); struct cifsInodeInfo { bool can_cache_brlcks; struct list_head llist; /* locks helb by this inode */ + /* + * NOTE: Some code paths call down_read(lock_sem) twice, so + * we must always use use cifs_down_write() instead of down_write() + * for this semaphore to avoid deadlocks. + */ struct rw_semaphore lock_sem; /* protect the fields above */ /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index e53e9f62b87b..fe597d3d5208 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -170,6 +170,7 @@ extern int cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, const unsigned int xid); extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile); +extern void cifs_down_write(struct rw_semaphore *sem); extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a64dfa95a925..ccaa8bad336f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -564,9 +564,11 @@ cifs_reconnect(struct TCP_Server_Info *server) spin_lock(&GlobalMid_Lock); list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); + kref_get(&mid_entry->refcount); if (mid_entry->mid_state == MID_REQUEST_SUBMITTED) mid_entry->mid_state = MID_RETRY_NEEDED; list_move(&mid_entry->qhead, &retry_list); + mid_entry->mid_flags |= MID_DELETED; } spin_unlock(&GlobalMid_Lock); mutex_unlock(&server->srv_mutex); @@ -576,6 +578,7 @@ cifs_reconnect(struct TCP_Server_Info *server) mid_entry = list_entry(tmp, struct mid_q_entry, qhead); list_del_init(&mid_entry->qhead); mid_entry->callback(mid_entry); + cifs_mid_q_entry_release(mid_entry); } if (cifs_rdma_enabled(server)) { @@ -895,8 +898,10 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed) if (mid->mid_flags & MID_DELETED) printk_once(KERN_WARNING "trying to dequeue a deleted mid\n"); - else + else { list_del_init(&mid->qhead); + mid->mid_flags |= MID_DELETED; + } spin_unlock(&GlobalMid_Lock); } @@ -966,8 +971,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); cifs_dbg(FYI, "Clearing mid 0x%llx\n", mid_entry->mid); + kref_get(&mid_entry->refcount); mid_entry->mid_state = MID_SHUTDOWN; list_move(&mid_entry->qhead, &dispose_list); + mid_entry->mid_flags |= MID_DELETED; } spin_unlock(&GlobalMid_Lock); @@ -977,6 +984,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) cifs_dbg(FYI, "Callback mid 0x%llx\n", mid_entry->mid); list_del_init(&mid_entry->qhead); mid_entry->callback(mid_entry); + cifs_mid_q_entry_release(mid_entry); } /* 1/8th of sec is more than enough time for them to exit */ msleep(125); @@ -3882,8 +3890,12 @@ generic_ip_connect(struct TCP_Server_Info *server) rc = socket->ops->connect(socket, saddr, slen, server->noblockcnt ? O_NONBLOCK : 0); - - if (rc == -EINPROGRESS) + /* + * When mounting SMB root file systems, we do not want to block in + * connect. Otherwise bail out and then let cifs_reconnect() perform + * reconnect failover - if possible. + */ + if (server->noblockcnt && rc == -EINPROGRESS) rc = 0; if (rc < 0) { cifs_dbg(FYI, "Error %d connecting to server\n", rc); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5ad15de2bb4f..fa7b0fa72bb3 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -281,6 +281,13 @@ cifs_has_mand_locks(struct cifsInodeInfo *cinode) return has_locks; } +void +cifs_down_write(struct rw_semaphore *sem) +{ + while (!down_write_trylock(sem)) + msleep(10); +} + struct cifsFileInfo * cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, __u32 oplock) @@ -306,7 +313,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, INIT_LIST_HEAD(&fdlocks->locks); fdlocks->cfile = cfile; cfile->llist = fdlocks; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); list_add(&fdlocks->llist, &cinode->llist); up_write(&cinode->lock_sem); @@ -405,10 +412,11 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) bool oplock_break_cancelled; spin_lock(&tcon->open_file_lock); - + spin_lock(&cifsi->open_file_lock); spin_lock(&cifs_file->file_info_lock); if (--cifs_file->count > 0) { spin_unlock(&cifs_file->file_info_lock); + spin_unlock(&cifsi->open_file_lock); spin_unlock(&tcon->open_file_lock); return; } @@ -421,9 +429,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); /* remove it from the lists */ - spin_lock(&cifsi->open_file_lock); list_del(&cifs_file->flist); - spin_unlock(&cifsi->open_file_lock); list_del(&cifs_file->tlist); atomic_dec(&tcon->num_local_opens); @@ -440,6 +446,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) cifs_set_oplock_level(cifsi, 0); } + spin_unlock(&cifsi->open_file_lock); spin_unlock(&tcon->open_file_lock); oplock_break_cancelled = wait_oplock_handler ? @@ -464,7 +471,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) * Delete any outstanding lock records. We'll lose them when the file * is closed anyway. */ - down_write(&cifsi->lock_sem); + cifs_down_write(&cifsi->lock_sem); list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { list_del(&li->llist); cifs_del_lock_waiters(li); @@ -1027,7 +1034,7 @@ static void cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) { struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); list_add_tail(&lock->llist, &cfile->llist->locks); up_write(&cinode->lock_sem); } @@ -1049,7 +1056,7 @@ cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, try_again: exist = false; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, lock->type, lock->flags, &conf_lock, @@ -1072,7 +1079,7 @@ try_again: (lock->blist.next == &lock->blist)); if (!rc) goto try_again; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); list_del_init(&lock->blist); } @@ -1125,7 +1132,7 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock) return rc; try_again: - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { up_write(&cinode->lock_sem); return rc; @@ -1331,7 +1338,7 @@ cifs_push_locks(struct cifsFileInfo *cfile) int rc = 0; /* we are going to update can_cache_brlcks here - need a write access */ - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { up_write(&cinode->lock_sem); return rc; @@ -1522,7 +1529,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, if (!buf) return -ENOMEM; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); for (i = 0; i < 2; i++) { cur = buf; num = 0; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 5dcc95b38310..df9377828e2f 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2475,9 +2475,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) rc = tcon->ses->server->ops->flush(xid, tcon, &wfile->fid); cifsFileInfo_put(wfile); if (rc) - return rc; + goto cifs_setattr_exit; } else if (rc != -EBADF) - return rc; + goto cifs_setattr_exit; else rc = 0; } diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index b7421a096319..514810694c0f 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -171,6 +171,9 @@ cifs_get_next_mid(struct TCP_Server_Info *server) /* we do not want to loop forever */ last_mid = cur_mid; cur_mid++; + /* avoid 0xFFFF MID */ + if (cur_mid == 0xffff) + cur_mid++; /* * This nested loop looks more expensive than it is. diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index e6a1fc72018f..8b0b512c5792 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -145,7 +145,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, cur = buf; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { if (flock->fl_start > li->offset || (flock->fl_start + length) < diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 4c0922596467..cd55af9b7cc5 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -4084,6 +4084,7 @@ free_pages: kfree(dw->ppages); cifs_small_buf_release(dw->buf); + kfree(dw); } @@ -4157,7 +4158,7 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid, dw->server = server; dw->ppages = pages; dw->len = len; - queue_work(cifsiod_wq, &dw->decrypt); + queue_work(decrypt_wq, &dw->decrypt); *num_mids = 0; /* worker thread takes care of finding mid */ return -1; } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 308ad0f495e1..ca3de62688d6 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -86,22 +86,8 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) static void _cifs_mid_q_entry_release(struct kref *refcount) { - struct mid_q_entry *mid = container_of(refcount, struct mid_q_entry, - refcount); - - mempool_free(mid, cifs_mid_poolp); -} - -void cifs_mid_q_entry_release(struct mid_q_entry *midEntry) -{ - spin_lock(&GlobalMid_Lock); - kref_put(&midEntry->refcount, _cifs_mid_q_entry_release); - spin_unlock(&GlobalMid_Lock); -} - -void -DeleteMidQEntry(struct mid_q_entry *midEntry) -{ + struct mid_q_entry *midEntry = + container_of(refcount, struct mid_q_entry, refcount); #ifdef CONFIG_CIFS_STATS2 __le16 command = midEntry->server->vals->lock_cmd; __u16 smb_cmd = le16_to_cpu(midEntry->command); @@ -166,6 +152,19 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) } } #endif + + mempool_free(midEntry, cifs_mid_poolp); +} + +void cifs_mid_q_entry_release(struct mid_q_entry *midEntry) +{ + spin_lock(&GlobalMid_Lock); + kref_put(&midEntry->refcount, _cifs_mid_q_entry_release); + spin_unlock(&GlobalMid_Lock); +} + +void DeleteMidQEntry(struct mid_q_entry *midEntry) +{ cifs_mid_q_entry_release(midEntry); } @@ -173,8 +172,10 @@ void cifs_delete_mid(struct mid_q_entry *mid) { spin_lock(&GlobalMid_Lock); - list_del_init(&mid->qhead); - mid->mid_flags |= MID_DELETED; + if (!(mid->mid_flags & MID_DELETED)) { + list_del_init(&mid->qhead); + mid->mid_flags |= MID_DELETED; + } spin_unlock(&GlobalMid_Lock); DeleteMidQEntry(mid); @@ -872,7 +873,10 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) rc = -EHOSTDOWN; break; default: - list_del_init(&mid->qhead); + if (!(mid->mid_flags & MID_DELETED)) { + list_del_init(&mid->qhead); + mid->mid_flags |= MID_DELETED; + } cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", __func__, mid->mid, mid->mid_state); rc = -EIO; @@ -220,10 +220,11 @@ static void *get_unlocked_entry(struct xa_state *xas, unsigned int order) for (;;) { entry = xas_find_conflict(xas); + if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) + return entry; if (dax_entry_order(entry) < order) return XA_RETRY_ENTRY; - if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) || - !dax_is_locked(entry)) + if (!dax_is_locked(entry)) return entry; wq = dax_entry_waitqueue(xas, entry, &ewait.key); diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 6419a2b3510d..3e8cebfb59b7 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_FUSE_FS) += fuse.o obj-$(CONFIG_CUSE) += cuse.o -obj-$(CONFIG_VIRTIO_FS) += virtio_fs.o +obj-$(CONFIG_VIRTIO_FS) += virtiofs.o fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o +virtiofs-y += virtio_fs.o diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index dadd617d826c..ed1abc9e33cf 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -276,10 +276,12 @@ static void flush_bg_queue(struct fuse_conn *fc) void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) { struct fuse_iqueue *fiq = &fc->iq; - bool async = req->args->end; + bool async; if (test_and_set_bit(FR_FINISHED, &req->flags)) goto put_request; + + async = req->args->end; /* * test_and_set_bit() implies smp_mb() between bit * changing and below intr_entry check. Pairs with diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index d572c900bb0f..54d638f9ba1c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -405,7 +405,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, else fuse_invalidate_entry_cache(entry); - fuse_advise_use_readdirplus(dir); + if (inode) + fuse_advise_use_readdirplus(dir); return newent; out_iput: @@ -1521,6 +1522,19 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, is_truncate = true; } + /* Flush dirty data/metadata before non-truncate SETATTR */ + if (is_wb && S_ISREG(inode->i_mode) && + attr->ia_valid & + (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET | + ATTR_TIMES_SET)) { + err = write_inode_now(inode, true); + if (err) + return err; + + fuse_set_nowrite(inode); + fuse_release_nowrite(inode); + } + if (is_truncate) { fuse_set_nowrite(inode); set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 0f0225686aee..db48a5cf8620 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -217,7 +217,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) { struct fuse_conn *fc = get_fuse_conn(inode); int err; - bool lock_inode = (file->f_flags & O_TRUNC) && + bool is_wb_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc && fc->writeback_cache; @@ -225,16 +225,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) if (err) return err; - if (lock_inode) + if (is_wb_truncate) { inode_lock(inode); + fuse_set_nowrite(inode); + } err = fuse_do_open(fc, get_node_id(inode), file, isdir); if (!err) fuse_finish_open(inode, file); - if (lock_inode) + if (is_wb_truncate) { + fuse_release_nowrite(inode); inode_unlock(inode); + } return err; } @@ -1997,7 +2001,7 @@ static int fuse_writepages_fill(struct page *page, if (!data->ff) { err = -EIO; - data->ff = fuse_write_file_get(fc, get_fuse_inode(inode)); + data->ff = fuse_write_file_get(fc, fi); if (!data->ff) goto out_unlock; } @@ -2042,8 +2046,6 @@ static int fuse_writepages_fill(struct page *page, * under writeback, so we can release the page lock. */ if (data->wpa == NULL) { - struct fuse_inode *fi = get_fuse_inode(inode); - err = -ENOMEM; wpa = fuse_writepage_args_alloc(); if (!wpa) { diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 956aeaf961ae..d148188cfca4 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -479,6 +479,7 @@ struct fuse_fs_context { bool destroy:1; bool no_control:1; bool no_force_umount:1; + bool no_mount_options:1; unsigned int max_read; unsigned int blksize; const char *subtype; @@ -713,6 +714,9 @@ struct fuse_conn { /** Do not allow MNT_FORCE umount */ unsigned int no_force_umount:1; + /* Do not show mount options */ + unsigned int no_mount_options:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e040e2a2b621..16aec32f7f3d 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -558,6 +558,9 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) struct super_block *sb = root->d_sb; struct fuse_conn *fc = get_fuse_conn_super(sb); + if (fc->no_mount_options) + return 0; + seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id)); seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id)); if (fc->default_permissions) @@ -1180,6 +1183,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) fc->destroy = ctx->destroy; fc->no_control = ctx->no_control; fc->no_force_umount = ctx->no_force_umount; + fc->no_mount_options = ctx->no_mount_options; err = -ENOMEM; root = fuse_get_root_inode(sb, ctx->rootmode); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 6af3f131e468..a5c86048b96e 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -30,6 +30,7 @@ struct virtio_fs_vq { struct virtqueue *vq; /* protected by ->lock */ struct work_struct done_work; struct list_head queued_reqs; + struct list_head end_reqs; /* End these requests */ struct delayed_work dispatch_work; struct fuse_dev *fud; bool connected; @@ -54,6 +55,9 @@ struct virtio_fs_forget { struct list_head list; }; +static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, + struct fuse_req *req, bool in_flight); + static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) { struct virtio_fs *fs = vq->vdev->priv; @@ -66,6 +70,19 @@ static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq) return &vq_to_fsvq(vq)->fud->pq; } +/* Should be called with fsvq->lock held. */ +static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) +{ + fsvq->in_flight++; +} + +/* Should be called with fsvq->lock held. */ +static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) +{ + WARN_ON(fsvq->in_flight <= 0); + fsvq->in_flight--; +} + static void release_virtio_fs_obj(struct kref *ref) { struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); @@ -109,22 +126,6 @@ static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) flush_delayed_work(&fsvq->dispatch_work); } -static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq) -{ - struct virtio_fs_forget *forget; - - spin_lock(&fsvq->lock); - while (1) { - forget = list_first_entry_or_null(&fsvq->queued_reqs, - struct virtio_fs_forget, list); - if (!forget) - break; - list_del(&forget->list); - kfree(forget); - } - spin_unlock(&fsvq->lock); -} - static void virtio_fs_drain_all_queues(struct virtio_fs *fs) { struct virtio_fs_vq *fsvq; @@ -132,9 +133,6 @@ static void virtio_fs_drain_all_queues(struct virtio_fs *fs) for (i = 0; i < fs->nvqs; i++) { fsvq = &fs->vqs[i]; - if (i == VQ_HIPRIO) - drain_hiprio_queued_reqs(fsvq); - virtio_fs_drain_queue(fsvq); } } @@ -253,14 +251,66 @@ static void virtio_fs_hiprio_done_work(struct work_struct *work) while ((req = virtqueue_get_buf(vq, &len)) != NULL) { kfree(req); - fsvq->in_flight--; + dec_in_flight_req(fsvq); } } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); spin_unlock(&fsvq->lock); } -static void virtio_fs_dummy_dispatch_work(struct work_struct *work) +static void virtio_fs_request_dispatch_work(struct work_struct *work) { + struct fuse_req *req; + struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, + dispatch_work.work); + struct fuse_conn *fc = fsvq->fud->fc; + int ret; + + pr_debug("virtio-fs: worker %s called.\n", __func__); + while (1) { + spin_lock(&fsvq->lock); + req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, + list); + if (!req) { + spin_unlock(&fsvq->lock); + break; + } + + list_del_init(&req->list); + spin_unlock(&fsvq->lock); + fuse_request_end(fc, req); + } + + /* Dispatch pending requests */ + while (1) { + spin_lock(&fsvq->lock); + req = list_first_entry_or_null(&fsvq->queued_reqs, + struct fuse_req, list); + if (!req) { + spin_unlock(&fsvq->lock); + return; + } + list_del_init(&req->list); + spin_unlock(&fsvq->lock); + + ret = virtio_fs_enqueue_req(fsvq, req, true); + if (ret < 0) { + if (ret == -ENOMEM || ret == -ENOSPC) { + spin_lock(&fsvq->lock); + list_add_tail(&req->list, &fsvq->queued_reqs); + schedule_delayed_work(&fsvq->dispatch_work, + msecs_to_jiffies(1)); + spin_unlock(&fsvq->lock); + return; + } + req->out.h.error = ret; + spin_lock(&fsvq->lock); + dec_in_flight_req(fsvq); + spin_unlock(&fsvq->lock); + pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", + ret); + fuse_request_end(fc, req); + } + } } static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) @@ -286,6 +336,7 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) list_del(&forget->list); if (!fsvq->connected) { + dec_in_flight_req(fsvq); spin_unlock(&fsvq->lock); kfree(forget); continue; @@ -307,13 +358,13 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) } else { pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", ret); + dec_in_flight_req(fsvq); kfree(forget); } spin_unlock(&fsvq->lock); return; } - fsvq->in_flight++; notify = virtqueue_kick_prepare(vq); spin_unlock(&fsvq->lock); @@ -452,7 +503,7 @@ static void virtio_fs_requests_done_work(struct work_struct *work) fuse_request_end(fc, req); spin_lock(&fsvq->lock); - fsvq->in_flight--; + dec_in_flight_req(fsvq); spin_unlock(&fsvq->lock); } } @@ -502,6 +553,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work); INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs); + INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs); INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work, virtio_fs_hiprio_dispatch_work); spin_lock_init(&fs->vqs[VQ_HIPRIO].lock); @@ -511,8 +563,9 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, spin_lock_init(&fs->vqs[i].lock); INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work); INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work, - virtio_fs_dummy_dispatch_work); + virtio_fs_request_dispatch_work); INIT_LIST_HEAD(&fs->vqs[i].queued_reqs); + INIT_LIST_HEAD(&fs->vqs[i].end_reqs); snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name), "requests.%u", i - VQ_REQUEST); callbacks[i] = virtio_fs_vq_done; @@ -708,6 +761,7 @@ __releases(fiq->lock) list_add_tail(&forget->list, &fsvq->queued_reqs); schedule_delayed_work(&fsvq->dispatch_work, msecs_to_jiffies(1)); + inc_in_flight_req(fsvq); } else { pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", ret); @@ -717,7 +771,7 @@ __releases(fiq->lock) goto out; } - fsvq->in_flight++; + inc_in_flight_req(fsvq); notify = virtqueue_kick_prepare(vq); spin_unlock(&fsvq->lock); @@ -819,7 +873,7 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg, /* Add a request to a virtqueue and kick the device */ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, - struct fuse_req *req) + struct fuse_req *req, bool in_flight) { /* requests need at least 4 elements */ struct scatterlist *stack_sgs[6]; @@ -835,6 +889,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, unsigned int i; int ret; bool notify; + struct fuse_pqueue *fpq; /* Does the sglist fit on the stack? */ total_sgs = sg_count_fuse_req(req); @@ -889,7 +944,17 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, goto out; } - fsvq->in_flight++; + /* Request successfully sent. */ + fpq = &fsvq->fud->pq; + spin_lock(&fpq->lock); + list_add_tail(&req->list, fpq->processing); + spin_unlock(&fpq->lock); + set_bit(FR_SENT, &req->flags); + /* matches barrier in request_wait_answer() */ + smp_mb__after_atomic(); + + if (!in_flight) + inc_in_flight_req(fsvq); notify = virtqueue_kick_prepare(vq); spin_unlock(&fsvq->lock); @@ -915,9 +980,8 @@ __releases(fiq->lock) { unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ struct virtio_fs *fs; - struct fuse_conn *fc; struct fuse_req *req; - struct fuse_pqueue *fpq; + struct virtio_fs_vq *fsvq; int ret; WARN_ON(list_empty(&fiq->pending)); @@ -928,44 +992,36 @@ __releases(fiq->lock) spin_unlock(&fiq->lock); fs = fiq->priv; - fc = fs->vqs[queue_id].fud->fc; pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", __func__, req->in.h.opcode, req->in.h.unique, req->in.h.nodeid, req->in.h.len, fuse_len_args(req->args->out_numargs, req->args->out_args)); - fpq = &fs->vqs[queue_id].fud->pq; - spin_lock(&fpq->lock); - if (!fpq->connected) { - spin_unlock(&fpq->lock); - req->out.h.error = -ENODEV; - pr_err("virtio-fs: %s disconnected\n", __func__); - fuse_request_end(fc, req); - return; - } - list_add_tail(&req->list, fpq->processing); - spin_unlock(&fpq->lock); - set_bit(FR_SENT, &req->flags); - /* matches barrier in request_wait_answer() */ - smp_mb__after_atomic(); - -retry: - ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req); + fsvq = &fs->vqs[queue_id]; + ret = virtio_fs_enqueue_req(fsvq, req, false); if (ret < 0) { if (ret == -ENOMEM || ret == -ENOSPC) { - /* Virtqueue full. Retry submission */ - /* TODO use completion instead of timeout */ - usleep_range(20, 30); - goto retry; + /* + * Virtqueue full. Retry submission from worker + * context as we might be holding fc->bg_lock. + */ + spin_lock(&fsvq->lock); + list_add_tail(&req->list, &fsvq->queued_reqs); + inc_in_flight_req(fsvq); + schedule_delayed_work(&fsvq->dispatch_work, + msecs_to_jiffies(1)); + spin_unlock(&fsvq->lock); + return; } req->out.h.error = ret; pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); - spin_lock(&fpq->lock); - clear_bit(FR_SENT, &req->flags); - list_del_init(&req->list); - spin_unlock(&fpq->lock); - fuse_request_end(fc, req); + + /* Can't end request in submission context. Use a worker */ + spin_lock(&fsvq->lock); + list_add_tail(&req->list, &fsvq->end_reqs); + schedule_delayed_work(&fsvq->dispatch_work, 0); + spin_unlock(&fsvq->lock); return; } } @@ -992,6 +1048,7 @@ static int virtio_fs_fill_super(struct super_block *sb) .destroy = true, .no_control = true, .no_force_umount = true, + .no_mount_options = true, }; mutex_lock(&virtio_fs_mutex); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 681b44682b0d..18daf494abab 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1540,17 +1540,23 @@ static int gfs2_init_fs_context(struct fs_context *fc) { struct gfs2_args *args; - args = kzalloc(sizeof(*args), GFP_KERNEL); + args = kmalloc(sizeof(*args), GFP_KERNEL); if (args == NULL) return -ENOMEM; - args->ar_quota = GFS2_QUOTA_DEFAULT; - args->ar_data = GFS2_DATA_DEFAULT; - args->ar_commit = 30; - args->ar_statfs_quantum = 30; - args->ar_quota_quantum = 60; - args->ar_errors = GFS2_ERRORS_DEFAULT; + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + struct gfs2_sbd *sdp = fc->root->d_sb->s_fs_info; + *args = sdp->sd_args; + } else { + memset(args, 0, sizeof(*args)); + args->ar_quota = GFS2_QUOTA_DEFAULT; + args->ar_data = GFS2_DATA_DEFAULT; + args->ar_commit = 30; + args->ar_statfs_quantum = 30; + args->ar_quota_quantum = 60; + args->ar_errors = GFS2_ERRORS_DEFAULT; + } fc->fs_private = args; fc->ops = &gfs2_context_ops; return 0; @@ -1600,6 +1606,7 @@ static int gfs2_meta_get_tree(struct fs_context *fc) } static const struct fs_context_operations gfs2_meta_context_ops = { + .free = gfs2_fc_free, .get_tree = gfs2_meta_get_tree, }; diff --git a/fs/io_uring.c b/fs/io_uring.c index 67dbe0201e0d..f9a38998f2fc 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -197,6 +197,7 @@ struct io_ring_ctx { unsigned sq_entries; unsigned sq_mask; unsigned sq_thread_idle; + unsigned cached_sq_dropped; struct io_uring_sqe *sq_sqes; struct list_head defer_list; @@ -212,6 +213,7 @@ struct io_ring_ctx { struct { unsigned cached_cq_tail; + atomic_t cached_cq_overflow; unsigned cq_entries; unsigned cq_mask; struct wait_queue_head cq_wait; @@ -420,7 +422,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) static inline bool __io_sequence_defer(struct io_ring_ctx *ctx, struct io_kiocb *req) { - return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped; + return req->sequence != ctx->cached_cq_tail + ctx->cached_sq_dropped + + atomic_read(&ctx->cached_cq_overflow); } static inline bool io_sequence_defer(struct io_ring_ctx *ctx, @@ -567,9 +570,8 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, WRITE_ONCE(cqe->res, res); WRITE_ONCE(cqe->flags, 0); } else { - unsigned overflow = READ_ONCE(ctx->rings->cq_overflow); - - WRITE_ONCE(ctx->rings->cq_overflow, overflow + 1); + WRITE_ONCE(ctx->rings->cq_overflow, + atomic_inc_return(&ctx->cached_cq_overflow)); } } @@ -735,6 +737,14 @@ static unsigned io_cqring_events(struct io_rings *rings) return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head); } +static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) +{ + struct io_rings *rings = ctx->rings; + + /* make sure SQ entry isn't read before tail */ + return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head; +} + /* * Find and free completed poll iocbs */ @@ -864,19 +874,11 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); } -static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, - long min) +static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, + long min) { - int iters, ret = 0; + int iters = 0, ret = 0; - /* - * We disallow the app entering submit/complete with polling, but we - * still need to lock the ring to prevent racing with polled issue - * that got punted to a workqueue. - */ - mutex_lock(&ctx->uring_lock); - - iters = 0; do { int tmin = 0; @@ -912,6 +914,21 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, ret = 0; } while (min && !*nr_events && !need_resched()); + return ret; +} + +static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, + long min) +{ + int ret; + + /* + * We disallow the app entering submit/complete with polling, but we + * still need to lock the ring to prevent racing with polled issue + * that got punted to a workqueue. + */ + mutex_lock(&ctx->uring_lock); + ret = __io_iopoll_check(ctx, nr_events, min); mutex_unlock(&ctx->uring_lock); return ret; } @@ -1107,6 +1124,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, kiocb->ki_flags |= IOCB_HIPRI; kiocb->ki_complete = io_complete_rw_iopoll; + req->result = 0; } else { if (kiocb->ki_flags & IOCB_HIPRI) return -EINVAL; @@ -1877,7 +1895,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) { struct io_ring_ctx *ctx; - struct io_kiocb *req; + struct io_kiocb *req, *prev; unsigned long flags; req = container_of(timer, struct io_kiocb, timeout.timer); @@ -1885,6 +1903,15 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) atomic_inc(&ctx->cq_timeouts); spin_lock_irqsave(&ctx->completion_lock, flags); + /* + * Adjust the reqs sequence before the current one because it + * will consume a slot in the cq_ring and the the cq_tail pointer + * will be increased, otherwise other timeout reqs may return in + * advance without waiting for enough wait_nr. + */ + prev = req; + list_for_each_entry_continue_reverse(prev, &ctx->timeout_list, list) + prev->sequence++; list_del(&req->list); io_cqring_fill_event(ctx, req->user_data, -ETIME); @@ -1903,6 +1930,7 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_ring_ctx *ctx = req->ctx; struct list_head *entry; struct timespec64 ts; + unsigned span = 0; if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; @@ -1951,9 +1979,17 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (ctx->cached_sq_head < nxt_sq_head) tmp += UINT_MAX; - if (tmp >= tmp_nxt) + if (tmp > tmp_nxt) break; + + /* + * Sequence of reqs after the insert one and itself should + * be adjusted because each timeout req consumes a slot. + */ + span++; + nxt->sequence++; } + req->sequence -= span; list_add(&req->list, entry); spin_unlock_irq(&ctx->completion_lock); @@ -2292,11 +2328,11 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, } static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, - struct sqe_submit *s, bool force_nonblock) + struct sqe_submit *s) { int ret; - ret = __io_submit_sqe(ctx, req, s, force_nonblock); + ret = __io_submit_sqe(ctx, req, s, true); /* * We async punt it if the file wasn't marked NOWAIT, or if the file @@ -2343,7 +2379,7 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, } static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, - struct sqe_submit *s, bool force_nonblock) + struct sqe_submit *s) { int ret; @@ -2356,18 +2392,17 @@ static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, return 0; } - return __io_queue_sqe(ctx, req, s, force_nonblock); + return __io_queue_sqe(ctx, req, s); } static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req, - struct sqe_submit *s, struct io_kiocb *shadow, - bool force_nonblock) + struct sqe_submit *s, struct io_kiocb *shadow) { int ret; int need_submit = false; if (!shadow) - return io_queue_sqe(ctx, req, s, force_nonblock); + return io_queue_sqe(ctx, req, s); /* * Mark the first IO in link list as DRAIN, let all the following @@ -2379,6 +2414,7 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req, if (ret) { if (ret != -EIOCBQUEUED) { io_free_req(req); + __io_free_req(shadow); io_cqring_add_event(ctx, s->sqe->user_data, ret); return 0; } @@ -2396,7 +2432,7 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req, spin_unlock_irq(&ctx->completion_lock); if (need_submit) - return __io_queue_sqe(ctx, req, s, force_nonblock); + return __io_queue_sqe(ctx, req, s); return 0; } @@ -2404,8 +2440,7 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req, #define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK) static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, - struct io_submit_state *state, struct io_kiocb **link, - bool force_nonblock) + struct io_submit_state *state, struct io_kiocb **link) { struct io_uring_sqe *sqe_copy; struct io_kiocb *req; @@ -2432,6 +2467,8 @@ err: return; } + req->user_data = s->sqe->user_data; + /* * If we already have a head request, queue this one for async * submittal once the head completes. If we don't have a head but @@ -2458,7 +2495,7 @@ err: INIT_LIST_HEAD(&req->link_list); *link = req; } else { - io_queue_sqe(ctx, req, s, force_nonblock); + io_queue_sqe(ctx, req, s); } } @@ -2538,12 +2575,13 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) /* drop invalid entries */ ctx->cached_sq_head++; - rings->sq_dropped++; + ctx->cached_sq_dropped++; + WRITE_ONCE(rings->sq_dropped, ctx->cached_sq_dropped); return false; } -static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, - unsigned int nr, bool has_user, bool mm_fault) +static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, + bool has_user, bool mm_fault) { struct io_submit_state state, *statep = NULL; struct io_kiocb *link = NULL; @@ -2557,19 +2595,23 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, } for (i = 0; i < nr; i++) { + struct sqe_submit s; + + if (!io_get_sqring(ctx, &s)) + break; + /* * If previous wasn't linked and we have a linked command, * that's the end of the chain. Submit the previous link. */ if (!prev_was_link && link) { - io_queue_link_head(ctx, link, &link->submit, shadow_req, - true); + io_queue_link_head(ctx, link, &link->submit, shadow_req); link = NULL; shadow_req = NULL; } - prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0; + prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0; - if (link && (sqes[i].sqe->flags & IOSQE_IO_DRAIN)) { + if (link && (s.sqe->flags & IOSQE_IO_DRAIN)) { if (!shadow_req) { shadow_req = io_get_req(ctx, NULL); if (unlikely(!shadow_req)) @@ -2577,24 +2619,24 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN); refcount_dec(&shadow_req->refs); } - shadow_req->sequence = sqes[i].sequence; + shadow_req->sequence = s.sequence; } out: if (unlikely(mm_fault)) { - io_cqring_add_event(ctx, sqes[i].sqe->user_data, + io_cqring_add_event(ctx, s.sqe->user_data, -EFAULT); } else { - sqes[i].has_user = has_user; - sqes[i].needs_lock = true; - sqes[i].needs_fixed_file = true; - io_submit_sqe(ctx, &sqes[i], statep, &link, true); + s.has_user = has_user; + s.needs_lock = true; + s.needs_fixed_file = true; + io_submit_sqe(ctx, &s, statep, &link); submitted++; } } if (link) - io_queue_link_head(ctx, link, &link->submit, shadow_req, true); + io_queue_link_head(ctx, link, &link->submit, shadow_req); if (statep) io_submit_state_end(&state); @@ -2603,7 +2645,6 @@ out: static int io_sq_thread(void *data) { - struct sqe_submit sqes[IO_IOPOLL_BATCH]; struct io_ring_ctx *ctx = data; struct mm_struct *cur_mm = NULL; mm_segment_t old_fs; @@ -2618,14 +2659,27 @@ static int io_sq_thread(void *data) timeout = inflight = 0; while (!kthread_should_park()) { - bool all_fixed, mm_fault = false; - int i; + bool mm_fault = false; + unsigned int to_submit; if (inflight) { unsigned nr_events = 0; if (ctx->flags & IORING_SETUP_IOPOLL) { - io_iopoll_check(ctx, &nr_events, 0); + /* + * inflight is the count of the maximum possible + * entries we submitted, but it can be smaller + * if we dropped some of them. If we don't have + * poll entries available, then we know that we + * have nothing left to poll for. Reset the + * inflight count to zero in that case. + */ + mutex_lock(&ctx->uring_lock); + if (!list_empty(&ctx->poll_list)) + __io_iopoll_check(ctx, &nr_events, 0); + else + inflight = 0; + mutex_unlock(&ctx->uring_lock); } else { /* * Normal IO, just pretend everything completed. @@ -2639,7 +2693,8 @@ static int io_sq_thread(void *data) timeout = jiffies + ctx->sq_thread_idle; } - if (!io_get_sqring(ctx, &sqes[0])) { + to_submit = io_sqring_entries(ctx); + if (!to_submit) { /* * We're polling. If we're within the defined idle * period, then let us spin without work before going @@ -2670,7 +2725,8 @@ static int io_sq_thread(void *data) /* make sure to read SQ tail after writing flags */ smp_mb(); - if (!io_get_sqring(ctx, &sqes[0])) { + to_submit = io_sqring_entries(ctx); + if (!to_submit) { if (kthread_should_park()) { finish_wait(&ctx->sqo_wait, &wait); break; @@ -2688,19 +2744,8 @@ static int io_sq_thread(void *data) ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; } - i = 0; - all_fixed = true; - do { - if (all_fixed && io_sqe_needs_user(sqes[i].sqe)) - all_fixed = false; - - i++; - if (i == ARRAY_SIZE(sqes)) - break; - } while (io_get_sqring(ctx, &sqes[i])); - /* Unless all new commands are FIXED regions, grab mm */ - if (!all_fixed && !cur_mm) { + if (!cur_mm) { mm_fault = !mmget_not_zero(ctx->sqo_mm); if (!mm_fault) { use_mm(ctx->sqo_mm); @@ -2708,8 +2753,9 @@ static int io_sq_thread(void *data) } } - inflight += io_submit_sqes(ctx, sqes, i, cur_mm != NULL, - mm_fault); + to_submit = min(to_submit, ctx->sq_entries); + inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL, + mm_fault); /* Commit SQ ring head once we've consumed all SQEs */ io_commit_sqring(ctx); @@ -2726,8 +2772,7 @@ static int io_sq_thread(void *data) return 0; } -static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit, - bool block_for_last) +static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) { struct io_submit_state state, *statep = NULL; struct io_kiocb *link = NULL; @@ -2741,7 +2786,6 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit, } for (i = 0; i < to_submit; i++) { - bool force_nonblock = true; struct sqe_submit s; if (!io_get_sqring(ctx, &s)) @@ -2752,8 +2796,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit, * that's the end of the chain. Submit the previous link. */ if (!prev_was_link && link) { - io_queue_link_head(ctx, link, &link->submit, shadow_req, - force_nonblock); + io_queue_link_head(ctx, link, &link->submit, shadow_req); link = NULL; shadow_req = NULL; } @@ -2775,27 +2818,16 @@ out: s.needs_lock = false; s.needs_fixed_file = false; submit++; - - /* - * The caller will block for events after submit, submit the - * last IO non-blocking. This is either the only IO it's - * submitting, or it already submitted the previous ones. This - * improves performance by avoiding an async punt that we don't - * need to do. - */ - if (block_for_last && submit == to_submit) - force_nonblock = false; - - io_submit_sqe(ctx, &s, statep, &link, force_nonblock); + io_submit_sqe(ctx, &s, statep, &link); } - io_commit_sqring(ctx); if (link) - io_queue_link_head(ctx, link, &link->submit, shadow_req, - !block_for_last); + io_queue_link_head(ctx, link, &link->submit, shadow_req); if (statep) io_submit_state_end(statep); + io_commit_sqring(ctx); + return submit; } @@ -3636,21 +3668,10 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, wake_up(&ctx->sqo_wait); submitted = to_submit; } else if (to_submit) { - bool block_for_last = false; - to_submit = min(to_submit, ctx->sq_entries); - /* - * Allow last submission to block in a series, IFF the caller - * asked to wait for events and we don't currently have - * enough. This potentially avoids an async punt. - */ - if (to_submit == min_complete && - io_cqring_events(ctx->rings) < min_complete) - block_for_last = true; - mutex_lock(&ctx->uring_lock); - submitted = io_ring_submit(ctx, to_submit, block_for_last); + submitted = io_ring_submit(ctx, to_submit); mutex_unlock(&ctx->uring_lock); } if (flags & IORING_ENTER_GETEVENTS) { @@ -3809,10 +3830,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) if (ret) goto err; - ret = io_uring_get_fd(ctx); - if (ret < 0) - goto err; - memset(&p->sq_off, 0, sizeof(p->sq_off)); p->sq_off.head = offsetof(struct io_rings, sq.head); p->sq_off.tail = offsetof(struct io_rings, sq.tail); @@ -3830,6 +3847,14 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) p->cq_off.overflow = offsetof(struct io_rings, cq_overflow); p->cq_off.cqes = offsetof(struct io_rings, cqes); + /* + * Install ring fd as the very last thing, so we don't risk someone + * having closed it before we finish setup + */ + ret = io_uring_get_fd(ctx); + if (ret < 0) + goto err; + p->features = IORING_FEAT_SINGLE_MMAP; return ret; err: diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 7d46fafdbbe5..0afb6d59bad0 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -464,7 +464,8 @@ nlm_bind_host(struct nlm_host *host) .version = host->h_version, .authflavor = RPC_AUTH_UNIX, .flags = (RPC_CLNT_CREATE_NOPING | - RPC_CLNT_CREATE_AUTOBIND), + RPC_CLNT_CREATE_AUTOBIND | + RPC_CLNT_CREATE_REUSEPORT), .cred = host->h_cred, }; diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 8f34daf85f70..549350259840 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -72,8 +72,8 @@ struct cb_getattrres { uint32_t bitmap[2]; uint64_t size; uint64_t change_attr; - struct timespec ctime; - struct timespec mtime; + struct timespec64 ctime; + struct timespec64 mtime; }; struct cb_recallargs { diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index f39924ba050b..cd4c6bc81cae 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -26,7 +26,6 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, struct cb_getattrargs *args = argp; struct cb_getattrres *res = resp; struct nfs_delegation *delegation; - struct nfs_inode *nfsi; struct inode *inode; res->status = htonl(NFS4ERR_OP_NOT_IN_SESSION); @@ -47,17 +46,16 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, -ntohl(res->status)); goto out; } - nfsi = NFS_I(inode); rcu_read_lock(); - delegation = rcu_dereference(nfsi->delegation); + delegation = nfs4_get_valid_delegation(inode); if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0) goto out_iput; res->size = i_size_read(inode); res->change_attr = delegation->change_attr; if (nfs_have_writebacks(inode)) res->change_attr++; - res->ctime = timespec64_to_timespec(inode->i_ctime); - res->mtime = timespec64_to_timespec(inode->i_mtime); + res->ctime = inode->i_ctime; + res->mtime = inode->i_mtime; res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) & args->bitmap[0]; res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) & diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 73a5a5ea2976..03a20f5716c7 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -627,7 +627,7 @@ static __be32 encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, u return 0; } -static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec *time) +static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec64 *time) { __be32 *p; @@ -639,14 +639,14 @@ static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec *ti return 0; } -static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time) +static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time) { if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) return 0; return encode_attr_time(xdr,time); } -static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time) +static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time) { if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) return 0; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 30838304a0bf..02110a30a49e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -312,6 +312,12 @@ again: /* Match nfsv4 minorversion */ if (clp->cl_minorversion != data->minorversion) continue; + + /* Match request for a dedicated DS */ + if (test_bit(NFS_CS_DS, &data->init_flags) != + test_bit(NFS_CS_DS, &clp->cl_flags)) + continue; + /* Match the full socket address */ if (!rpc_cmp_addr_port(sap, clap)) /* Match all xprt_switch full socket addresses */ @@ -515,6 +521,10 @@ int nfs_create_rpc_client(struct nfs_client *clp, args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_INFINITE_SLOTS; + if (test_bit(NFS_CS_NOPING, &clp->cl_flags)) + args.flags |= RPC_CLNT_CREATE_NOPING; + if (test_bit(NFS_CS_REUSEPORT, &clp->cl_flags)) + args.flags |= RPC_CLNT_CREATE_REUSEPORT; if (!IS_ERR(clp->cl_rpcclient)) return 0; @@ -662,6 +672,7 @@ static int nfs_init_server(struct nfs_server *server, .timeparms = &timeparms, .cred = server->cred, .nconnect = data->nfs_server.nconnect, + .init_flags = (1UL << NFS_CS_REUSEPORT), }; struct nfs_client *clp; int error; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 071b90a45933..fe57b2b5314a 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -53,6 +53,16 @@ nfs4_is_valid_delegation(const struct nfs_delegation *delegation, return false; } +struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode) +{ + struct nfs_delegation *delegation; + + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (nfs4_is_valid_delegation(delegation, 0)) + return delegation; + return NULL; +} + static int nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) { @@ -189,7 +199,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, delegation = rcu_dereference(NFS_I(inode)->delegation); if (delegation != NULL) { spin_lock(&delegation->lock); - if (delegation->inode != NULL) { + if (nfs4_is_valid_delegation(delegation, 0)) { nfs4_stateid_copy(&delegation->stateid, stateid); delegation->type = type; delegation->pagemod_limit = pagemod_limit; @@ -219,7 +229,6 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * delegation->cred, &delegation->stateid, issync); - nfs_free_delegation(delegation); return res; } @@ -288,7 +297,10 @@ nfs_detach_delegation_locked(struct nfs_inode *nfsi, return NULL; spin_lock(&delegation->lock); - set_bit(NFS_DELEGATION_RETURNING, &delegation->flags); + if (!delegation->inode) { + spin_unlock(&delegation->lock); + return NULL; + } list_del_rcu(&delegation->super_list); delegation->inode = NULL; rcu_assign_pointer(nfsi->delegation, NULL); @@ -315,10 +327,12 @@ nfs_inode_detach_delegation(struct inode *inode) struct nfs_server *server = NFS_SERVER(inode); struct nfs_delegation *delegation; - delegation = nfs_start_delegation_return(nfsi); - if (delegation == NULL) - return NULL; - return nfs_detach_delegation(nfsi, delegation, server); + rcu_read_lock(); + delegation = rcu_dereference(nfsi->delegation); + if (delegation != NULL) + delegation = nfs_detach_delegation(nfsi, delegation, server); + rcu_read_unlock(); + return delegation; } static void @@ -329,6 +343,7 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation, delegation->stateid.seqid = update->stateid.seqid; smp_wmb(); delegation->type = update->type; + clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags); } } @@ -369,14 +384,18 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, spin_lock(&clp->cl_lock); old_delegation = rcu_dereference_protected(nfsi->delegation, lockdep_is_held(&clp->cl_lock)); - if (old_delegation != NULL) { - /* Is this an update of the existing delegation? */ - if (nfs4_stateid_match_other(&old_delegation->stateid, - &delegation->stateid)) { - nfs_update_inplace_delegation(old_delegation, - delegation); - goto out; - } + if (old_delegation == NULL) + goto add_new; + /* Is this an update of the existing delegation? */ + if (nfs4_stateid_match_other(&old_delegation->stateid, + &delegation->stateid)) { + spin_lock(&old_delegation->lock); + nfs_update_inplace_delegation(old_delegation, + delegation); + spin_unlock(&old_delegation->lock); + goto out; + } + if (!test_bit(NFS_DELEGATION_REVOKED, &old_delegation->flags)) { /* * Deal with broken servers that hand out two * delegations for the same file. @@ -395,11 +414,11 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, if (test_and_set_bit(NFS_DELEGATION_RETURNING, &old_delegation->flags)) goto out; - freeme = nfs_detach_delegation_locked(nfsi, - old_delegation, clp); - if (freeme == NULL) - goto out; } + freeme = nfs_detach_delegation_locked(nfsi, old_delegation, clp); + if (freeme == NULL) + goto out; +add_new: list_add_tail_rcu(&delegation->super_list, &server->delegations); rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; @@ -414,8 +433,10 @@ out: spin_unlock(&clp->cl_lock); if (delegation != NULL) nfs_free_delegation(delegation); - if (freeme != NULL) + if (freeme != NULL) { nfs_do_return_delegation(inode, freeme, 0); + nfs_free_delegation(freeme); + } return status; } @@ -425,7 +446,6 @@ out: static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync) { struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; - struct nfs_inode *nfsi = NFS_I(inode); int err = 0; if (delegation == NULL) @@ -447,8 +467,6 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation nfs_abort_delegation_return(delegation, clp); goto out; } - if (!nfs_detach_delegation(nfsi, delegation, NFS_SERVER(inode))) - goto out; err = nfs_do_return_delegation(inode, delegation, issync); out: @@ -459,8 +477,6 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) { bool ret = false; - if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) - goto out; if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) ret = true; if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) { @@ -472,7 +488,10 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) ret = true; spin_unlock(&delegation->lock); } -out: + if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) || + test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) + ret = false; + return ret; } @@ -575,19 +594,23 @@ restart: } /** - * nfs_inode_return_delegation_noreclaim - return delegation, don't reclaim opens + * nfs_inode_evict_delegation - return delegation, don't reclaim opens * @inode: inode to process * * Does not protect against delegation reclaims, therefore really only safe - * to be called from nfs4_clear_inode(). + * to be called from nfs4_clear_inode(). Guaranteed to always free + * the delegation structure. */ -void nfs_inode_return_delegation_noreclaim(struct inode *inode) +void nfs_inode_evict_delegation(struct inode *inode) { struct nfs_delegation *delegation; delegation = nfs_inode_detach_delegation(inode); - if (delegation != NULL) + if (delegation != NULL) { + set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags); nfs_do_return_delegation(inode, delegation, 1); + nfs_free_delegation(delegation); + } } /** @@ -623,10 +646,18 @@ int nfs4_inode_return_delegation(struct inode *inode) */ int nfs4_inode_make_writeable(struct inode *inode) { - if (!nfs4_has_session(NFS_SERVER(inode)->nfs_client) || - !nfs4_check_delegation(inode, FMODE_WRITE)) - return nfs4_inode_return_delegation(inode); - return 0; + struct nfs_delegation *delegation; + + rcu_read_lock(); + delegation = nfs4_get_valid_delegation(inode); + if (delegation == NULL || + (nfs4_has_session(NFS_SERVER(inode)->nfs_client) && + (delegation->type & FMODE_WRITE))) { + rcu_read_unlock(); + return 0; + } + rcu_read_unlock(); + return nfs4_inode_return_delegation(inode); } static void nfs_mark_return_if_closed_delegation(struct nfs_server *server, @@ -734,10 +765,9 @@ static void nfs_mark_delegation_revoked(struct nfs_server *server, { set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); delegation->stateid.type = NFS4_INVALID_STATEID_TYPE; - nfs_mark_return_delegation(server, delegation); } -static bool nfs_revoke_delegation(struct inode *inode, +static void nfs_revoke_delegation(struct inode *inode, const nfs4_stateid *stateid) { struct nfs_delegation *delegation; @@ -751,29 +781,69 @@ static bool nfs_revoke_delegation(struct inode *inode, if (stateid == NULL) { nfs4_stateid_copy(&tmp, &delegation->stateid); stateid = &tmp; - } else if (!nfs4_stateid_match(stateid, &delegation->stateid)) - goto out; + } else { + if (!nfs4_stateid_match_other(stateid, &delegation->stateid)) + goto out; + spin_lock(&delegation->lock); + if (stateid->seqid) { + if (nfs4_stateid_is_newer(&delegation->stateid, stateid)) { + spin_unlock(&delegation->lock); + goto out; + } + delegation->stateid.seqid = stateid->seqid; + } + spin_unlock(&delegation->lock); + } nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); ret = true; out: rcu_read_unlock(); if (ret) nfs_inode_find_state_and_recover(inode, stateid); - return ret; } void nfs_remove_bad_delegation(struct inode *inode, const nfs4_stateid *stateid) { + nfs_revoke_delegation(inode, stateid); +} +EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation); + +void nfs_delegation_mark_returned(struct inode *inode, + const nfs4_stateid *stateid) +{ struct nfs_delegation *delegation; - if (!nfs_revoke_delegation(inode, stateid)) + if (!inode) return; - delegation = nfs_inode_detach_delegation(inode); - if (delegation) - nfs_free_delegation(delegation); + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (!delegation) + goto out_rcu_unlock; + + spin_lock(&delegation->lock); + if (!nfs4_stateid_match_other(stateid, &delegation->stateid)) + goto out_spin_unlock; + if (stateid->seqid) { + /* If delegation->stateid is newer, dont mark as returned */ + if (nfs4_stateid_is_newer(&delegation->stateid, stateid)) + goto out_clear_returning; + if (delegation->stateid.seqid != stateid->seqid) + delegation->stateid.seqid = stateid->seqid; + } + + nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); + +out_clear_returning: + clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); +out_spin_unlock: + spin_unlock(&delegation->lock); +out_rcu_unlock: + rcu_read_unlock(); + + nfs_inode_find_state_and_recover(inode, stateid); } -EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation); /** * nfs_expire_unused_delegation_types @@ -830,7 +900,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation; rcu_read_lock(); - delegation = rcu_dereference(NFS_I(inode)->delegation); + delegation = nfs4_get_valid_delegation(inode); if (delegation == NULL) goto out_enoent; if (stateid != NULL && @@ -856,6 +926,7 @@ nfs_delegation_find_inode_server(struct nfs_server *server, list_for_each_entry_rcu(delegation, &server->delegations, super_list) { spin_lock(&delegation->lock); if (delegation->inode != NULL && + !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { freeme = igrab(delegation->inode); if (freeme && nfs_sb_active(freeme->i_sb)) @@ -1130,7 +1201,8 @@ void nfs_inode_find_delegation_state_and_recover(struct inode *inode, rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); if (delegation && - nfs4_stateid_match_other(&delegation->stateid, stateid)) { + nfs4_stateid_match_or_older(&delegation->stateid, stateid) && + !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { nfs_mark_test_expired_delegation(NFS_SERVER(inode), delegation); found = true; } @@ -1179,9 +1251,11 @@ bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode) rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); if (delegation != NULL && - nfs4_stateid_match_other(dst, &delegation->stateid)) { + nfs4_stateid_match_other(dst, &delegation->stateid) && + nfs4_stateid_is_newer(&delegation->stateid, dst) && + !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { dst->seqid = delegation->stateid.seqid; - return ret; + ret = true; } rcu_read_unlock(); out: diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 9eb87ae4c982..15d3484be028 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -43,7 +43,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); int nfs4_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); -void nfs_inode_return_delegation_noreclaim(struct inode *inode); +void nfs_inode_evict_delegation(struct inode *inode); struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); void nfs_server_return_all_delegations(struct nfs_server *); @@ -53,6 +53,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp); int nfs_client_return_marked_delegations(struct nfs_client *clp); int nfs_delegations_present(struct nfs_client *clp); void nfs_remove_bad_delegation(struct inode *inode, const nfs4_stateid *stateid); +void nfs_delegation_mark_returned(struct inode *inode, const nfs4_stateid *stateid); void nfs_delegation_mark_reclaim(struct nfs_client *clp); void nfs_delegation_reap_unclaimed(struct nfs_client *clp); @@ -68,6 +69,7 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred); bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode); +struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs4_have_delegation(struct inode *inode, fmode_t flags); int nfs4_check_delegation(struct inode *inode, fmode_t flags); diff --git a/fs/nfs/export.c b/fs/nfs/export.c index deecb67638aa..3430d6891e89 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -105,6 +105,7 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid, ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label, NULL); if (ret) { dprintk("%s: getattr failed %d\n", __func__, ret); + trace_nfs_fh_to_dentry(sb, server_fh, fattr->fileid, ret); dentry = ERR_PTR(ret); goto out_free_label; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 95dc90570786..8eb731d9be3e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -649,7 +649,7 @@ out: out_swapfile: printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); - return -EBUSY; + return -ETXTBSY; } EXPORT_SYMBOL_GPL(nfs_file_write); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2a03bfeec10a..b0b4b9f303fd 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -504,15 +504,15 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfsi->read_cache_jiffies = fattr->time_start; nfsi->attr_gencount = fattr->gencount; if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = timespec_to_timespec64(fattr->atime); + inode->i_atime = fattr->atime; else if (nfs_server_capable(inode, NFS_CAP_ATIME)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) - inode->i_mtime = timespec_to_timespec64(fattr->mtime); + inode->i_mtime = fattr->mtime; else if (nfs_server_capable(inode, NFS_CAP_MTIME)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + inode->i_ctime = fattr->ctime; else if (nfs_server_capable(inode, NFS_CAP_CTIME)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) @@ -698,7 +698,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + inode->i_ctime = fattr->ctime; else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -709,14 +709,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = timespec_to_timespec64(fattr->atime); + inode->i_atime = fattr->atime; else if (attr->ia_valid & ATTR_ATIME_SET) inode->i_atime = attr->ia_atime; else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + inode->i_ctime = fattr->ctime; else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -725,14 +725,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME | NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) - inode->i_mtime = timespec_to_timespec64(fattr->mtime); + inode->i_mtime = fattr->mtime; else if (attr->ia_valid & ATTR_MTIME_SET) inode->i_mtime = attr->ia_mtime; else nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + inode->i_ctime = fattr->ctime; else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -1351,7 +1351,7 @@ static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi) static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { - struct timespec ts; + struct timespec64 ts; if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) && (fattr->valid & NFS_ATTR_FATTR_CHANGE) @@ -1361,18 +1361,18 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); } /* If we have atomic WCC data, we may update some attributes */ - ts = timespec64_to_timespec(inode->i_ctime); + ts = inode->i_ctime; if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) && (fattr->valid & NFS_ATTR_FATTR_CTIME) - && timespec_equal(&ts, &fattr->pre_ctime)) { - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + && timespec64_equal(&ts, &fattr->pre_ctime)) { + inode->i_ctime = fattr->ctime; } - ts = timespec64_to_timespec(inode->i_mtime); + ts = inode->i_mtime; if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) && (fattr->valid & NFS_ATTR_FATTR_MTIME) - && timespec_equal(&ts, &fattr->pre_mtime)) { - inode->i_mtime = timespec_to_timespec64(fattr->mtime); + && timespec64_equal(&ts, &fattr->pre_mtime)) { + inode->i_mtime = fattr->mtime; if (S_ISDIR(inode->i_mode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); } @@ -1398,7 +1398,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; unsigned long invalid = 0; - struct timespec ts; + struct timespec64 ts; if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) return 0; @@ -1425,12 +1425,12 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat invalid |= NFS_INO_INVALID_CHANGE | NFS_INO_REVAL_PAGECACHE; - ts = timespec64_to_timespec(inode->i_mtime); - if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&ts, &fattr->mtime)) + ts = inode->i_mtime; + if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime)) invalid |= NFS_INO_INVALID_MTIME; - ts = timespec64_to_timespec(inode->i_ctime); - if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&ts, &fattr->ctime)) + ts = inode->i_ctime; + if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec64_equal(&ts, &fattr->ctime)) invalid |= NFS_INO_INVALID_CTIME; if (fattr->valid & NFS_ATTR_FATTR_SIZE) { @@ -1460,8 +1460,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) invalid |= NFS_INO_INVALID_OTHER; - ts = timespec64_to_timespec(inode->i_atime); - if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&ts, &fattr->atime)) + ts = inode->i_atime; + if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime)) invalid |= NFS_INO_INVALID_ATIME; if (invalid != 0) @@ -1733,12 +1733,12 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa } if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && (fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) { - fattr->pre_ctime = timespec64_to_timespec(inode->i_ctime); + fattr->pre_ctime = inode->i_ctime; fattr->valid |= NFS_ATTR_FATTR_PRECTIME; } if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 && (fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) { - fattr->pre_mtime = timespec64_to_timespec(inode->i_mtime); + fattr->pre_mtime = inode->i_mtime; fattr->valid |= NFS_ATTR_FATTR_PREMTIME; } if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 && @@ -1899,7 +1899,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } if (fattr->valid & NFS_ATTR_FATTR_MTIME) { - inode->i_mtime = timespec_to_timespec64(fattr->mtime); + inode->i_mtime = fattr->mtime; } else if (server->caps & NFS_CAP_MTIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_MTIME @@ -1908,7 +1908,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } if (fattr->valid & NFS_ATTR_FATTR_CTIME) { - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + inode->i_ctime = fattr->ctime; } else if (server->caps & NFS_CAP_CTIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_CTIME @@ -1946,7 +1946,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = timespec_to_timespec64(fattr->atime); + inode->i_atime = fattr->atime; else if (server->caps & NFS_CAP_ATIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATIME diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 447a3c17fa8e..24a65da58aa9 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -713,7 +713,7 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) * 1024*1024*1024. */ static inline -u64 nfs_timespec_to_change_attr(const struct timespec *ts) +u64 nfs_timespec_to_change_attr(const struct timespec64 *ts) { return ((u64)ts->tv_sec << 30) + ts->tv_nsec; } diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 9287eb666322..5e0e9d29f5c5 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -157,6 +157,9 @@ struct vfsmount *nfs_d_automount(struct path *path) if (IS_ERR(mnt)) goto out; + if (nfs_mountpoint_expiry_timeout < 0) + goto out; + mntget(mnt); /* prevent immediate expiration */ mnt_set_expiry(mnt, &nfs_automount_list); schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index cbc17a203248..d94c7abdf25a 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -209,9 +209,9 @@ static int decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh) * unsigned int useconds; * }; */ -static __be32 *xdr_encode_time(__be32 *p, const struct timespec *timep) +static __be32 *xdr_encode_time(__be32 *p, const struct timespec64 *timep) { - *p++ = cpu_to_be32(timep->tv_sec); + *p++ = cpu_to_be32((u32)timep->tv_sec); if (timep->tv_nsec != 0) *p++ = cpu_to_be32(timep->tv_nsec / NSEC_PER_USEC); else @@ -227,14 +227,14 @@ static __be32 *xdr_encode_time(__be32 *p, const struct timespec *timep) * Illustrated" by Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5. */ static __be32 *xdr_encode_current_server_time(__be32 *p, - const struct timespec *timep) + const struct timespec64 *timep) { *p++ = cpu_to_be32(timep->tv_sec); *p++ = cpu_to_be32(1000000); return p; } -static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep) +static __be32 *xdr_decode_time(__be32 *p, struct timespec64 *timep) { timep->tv_sec = be32_to_cpup(p++); timep->tv_nsec = be32_to_cpup(p++) * NSEC_PER_USEC; @@ -339,7 +339,6 @@ static __be32 *xdr_time_not_set(__be32 *p) static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr, struct user_namespace *userns) { - struct timespec ts; __be32 *p; p = xdr_reserve_space(xdr, NFS_sattr_sz << 2); @@ -362,19 +361,15 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr, *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); if (attr->ia_valid & ATTR_ATIME_SET) { - ts = timespec64_to_timespec(attr->ia_atime); - p = xdr_encode_time(p, &ts); + p = xdr_encode_time(p, &attr->ia_atime); } else if (attr->ia_valid & ATTR_ATIME) { - ts = timespec64_to_timespec(attr->ia_atime); - p = xdr_encode_current_server_time(p, &ts); + p = xdr_encode_current_server_time(p, &attr->ia_atime); } else p = xdr_time_not_set(p); if (attr->ia_valid & ATTR_MTIME_SET) { - ts = timespec64_to_timespec(attr->ia_atime); - xdr_encode_time(p, &ts); + xdr_encode_time(p, &attr->ia_mtime); } else if (attr->ia_valid & ATTR_MTIME) { - ts = timespec64_to_timespec(attr->ia_mtime); - xdr_encode_current_server_time(p, &ts); + xdr_encode_current_server_time(p, &attr->ia_mtime); } else xdr_time_not_set(p); } diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 148ceb74d27c..223904bc40a7 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -106,7 +106,10 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, cl_init.nconnect = mds_clp->cl_nconnect; if (mds_srv->flags & NFS_MOUNT_NORESVPORT) - set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + + __set_bit(NFS_CS_NOPING, &cl_init.init_flags); + __set_bit(NFS_CS_DS, &cl_init.init_flags); /* Use the MDS nfs_client cl_ipaddr. */ nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 602767850b36..927eb680f161 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -456,14 +456,14 @@ static void zero_nfs_fh3(struct nfs_fh *fh) * uint32 nseconds; * }; */ -static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec *timep) +static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec64 *timep) { - *p++ = cpu_to_be32(timep->tv_sec); + *p++ = cpu_to_be32((u32)timep->tv_sec); *p++ = cpu_to_be32(timep->tv_nsec); return p; } -static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep) +static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec64 *timep) { timep->tv_sec = be32_to_cpup(p++); timep->tv_nsec = be32_to_cpup(p++); @@ -533,7 +533,6 @@ static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep) static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr, struct user_namespace *userns) { - struct timespec ts; u32 nbytes; __be32 *p; @@ -583,10 +582,8 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr, *p++ = xdr_zero; if (attr->ia_valid & ATTR_ATIME_SET) { - struct timespec ts; *p++ = xdr_two; - ts = timespec64_to_timespec(attr->ia_atime); - p = xdr_encode_nfstime3(p, &ts); + p = xdr_encode_nfstime3(p, &attr->ia_atime); } else if (attr->ia_valid & ATTR_ATIME) { *p++ = xdr_one; } else @@ -594,8 +591,7 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr, if (attr->ia_valid & ATTR_MTIME_SET) { *p++ = xdr_two; - ts = timespec64_to_timespec(attr->ia_mtime); - xdr_encode_nfstime3(p, &ts); + xdr_encode_nfstime3(p, &attr->ia_mtime); } else if (attr->ia_valid & ATTR_MTIME) { *p = xdr_one; } else diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 901cca7542f9..c891af949886 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -13,8 +13,10 @@ #define PNFS_LAYOUTSTATS_MAXDEV (4) /* nfs4.2proc.c */ +#ifdef CONFIG_NFS_V4_2 int nfs42_proc_allocate(struct file *, loff_t, loff_t); -ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t); +ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t, + struct nl4_server *, nfs4_stateid *, bool); int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); int nfs42_proc_layoutstats_generic(struct nfs_server *, @@ -23,5 +25,16 @@ int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t); int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg, const struct nfs42_layout_error *errors, size_t n); +int nfs42_proc_copy_notify(struct file *, struct file *, + struct nfs42_copy_notify_res *); +static inline bool nfs42_files_from_same_server(struct file *in, + struct file *out) +{ + struct nfs_client *c_in = (NFS_SERVER(file_inode(in)))->nfs_client; + struct nfs_client *c_out = (NFS_SERVER(file_inode(out)))->nfs_client; + return nfs4_check_serverowner_major_id(c_in->cl_serverowner, + c_out->cl_serverowner); +} +#endif /* CONFIG_NFS_V4_2 */ #endif /* __LINUX_FS_NFS_NFS4_2_H */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 5196bfa7894d..1fe83e0f663e 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -3,6 +3,7 @@ * Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com> */ #include <linux/fs.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/sched.h> #include <linux/nfs.h> #include <linux/nfs3.h> @@ -15,10 +16,30 @@ #include "pnfs.h" #include "nfs4session.h" #include "internal.h" +#include "delegation.h" #define NFSDBG_FACILITY NFSDBG_PROC static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std); +static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr) +{ + struct nfs_client *clp = (NFS_SERVER(file_inode(filep)))->nfs_client; + unsigned short port = 2049; + + rcu_read_lock(); + naddr->netid_len = scnprintf(naddr->netid, + sizeof(naddr->netid), "%s", + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_NETID)); + naddr->addr_len = scnprintf(naddr->addr, + sizeof(naddr->addr), + "%s.%u.%u", + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR), + port >> 8, port & 255); + rcu_read_unlock(); +} + static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, struct nfs_lock_context *lock, loff_t offset, loff_t len) { @@ -28,7 +49,7 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, .falloc_fh = NFS_FH(inode), .falloc_offset = offset, .falloc_length = len, - .falloc_bitmask = server->cache_consistency_bitmask, + .falloc_bitmask = nfs4_fattr_bitmap, }; struct nfs42_falloc_res res = { .falloc_server = server, @@ -132,22 +153,26 @@ out_unlock: } static int handle_async_copy(struct nfs42_copy_res *res, - struct nfs_server *server, + struct nfs_server *dst_server, + struct nfs_server *src_server, struct file *src, struct file *dst, - nfs4_stateid *src_stateid) + nfs4_stateid *src_stateid, + bool *restart) { struct nfs4_copy_state *copy, *tmp_copy; int status = NFS4_OK; bool found_pending = false; - struct nfs_open_context *ctx = nfs_file_open_context(dst); + struct nfs_open_context *dst_ctx = nfs_file_open_context(dst); + struct nfs_open_context *src_ctx = nfs_file_open_context(src); copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); if (!copy) return -ENOMEM; - spin_lock(&server->nfs_client->cl_lock); - list_for_each_entry(tmp_copy, &server->nfs_client->pending_cb_stateids, + spin_lock(&dst_server->nfs_client->cl_lock); + list_for_each_entry(tmp_copy, + &dst_server->nfs_client->pending_cb_stateids, copies) { if (memcmp(&res->write_res.stateid, &tmp_copy->stateid, NFS4_STATEID_SIZE)) @@ -157,7 +182,7 @@ static int handle_async_copy(struct nfs42_copy_res *res, break; } if (found_pending) { - spin_unlock(&server->nfs_client->cl_lock); + spin_unlock(&dst_server->nfs_client->cl_lock); kfree(copy); copy = tmp_copy; goto out; @@ -165,19 +190,32 @@ static int handle_async_copy(struct nfs42_copy_res *res, memcpy(©->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE); init_completion(©->completion); - copy->parent_state = ctx->state; + copy->parent_dst_state = dst_ctx->state; + copy->parent_src_state = src_ctx->state; - list_add_tail(©->copies, &server->ss_copies); - spin_unlock(&server->nfs_client->cl_lock); + list_add_tail(©->copies, &dst_server->ss_copies); + spin_unlock(&dst_server->nfs_client->cl_lock); + + if (dst_server != src_server) { + spin_lock(&src_server->nfs_client->cl_lock); + list_add_tail(©->src_copies, &src_server->ss_copies); + spin_unlock(&src_server->nfs_client->cl_lock); + } status = wait_for_completion_interruptible(©->completion); - spin_lock(&server->nfs_client->cl_lock); + spin_lock(&dst_server->nfs_client->cl_lock); list_del_init(©->copies); - spin_unlock(&server->nfs_client->cl_lock); + spin_unlock(&dst_server->nfs_client->cl_lock); + if (dst_server != src_server) { + spin_lock(&src_server->nfs_client->cl_lock); + list_del_init(©->src_copies); + spin_unlock(&src_server->nfs_client->cl_lock); + } if (status == -ERESTARTSYS) { goto out_cancel; - } else if (copy->flags) { + } else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) { status = -EAGAIN; + *restart = true; goto out_cancel; } out: @@ -185,12 +223,14 @@ out: memcpy(&res->write_res.verifier, ©->verf, sizeof(copy->verf)); status = -copy->error; +out_free: kfree(copy); return status; out_cancel: nfs42_do_offload_cancel_async(dst, ©->stateid); - kfree(copy); - return status; + if (!nfs42_files_from_same_server(src, dst)) + nfs42_do_offload_cancel_async(src, src_stateid); + goto out_free; } static int process_copy_commit(struct file *dst, loff_t pos_dst, @@ -222,7 +262,10 @@ static ssize_t _nfs42_proc_copy(struct file *src, struct file *dst, struct nfs_lock_context *dst_lock, struct nfs42_copy_args *args, - struct nfs42_copy_res *res) + struct nfs42_copy_res *res, + struct nl4_server *nss, + nfs4_stateid *cnr_stateid, + bool *restart) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY], @@ -230,17 +273,23 @@ static ssize_t _nfs42_proc_copy(struct file *src, .rpc_resp = res, }; struct inode *dst_inode = file_inode(dst); - struct nfs_server *server = NFS_SERVER(dst_inode); + struct inode *src_inode = file_inode(src); + struct nfs_server *dst_server = NFS_SERVER(dst_inode); + struct nfs_server *src_server = NFS_SERVER(src_inode); loff_t pos_src = args->src_pos; loff_t pos_dst = args->dst_pos; size_t count = args->count; ssize_t status; - status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context, - src_lock, FMODE_READ); - if (status) - return status; - + if (nss) { + args->cp_src = nss; + nfs4_stateid_copy(&args->src_stateid, cnr_stateid); + } else { + status = nfs4_set_rw_stateid(&args->src_stateid, + src_lock->open_context, src_lock, FMODE_READ); + if (status) + return status; + } status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping, pos_src, pos_src + (loff_t)count - 1); if (status) @@ -262,13 +311,15 @@ static ssize_t _nfs42_proc_copy(struct file *src, if (!res->commit_res.verf) return -ENOMEM; } + set_bit(NFS_CLNT_SRC_SSC_COPY_STATE, + &src_lock->open_context->state->flags); set_bit(NFS_CLNT_DST_SSC_COPY_STATE, &dst_lock->open_context->state->flags); - status = nfs4_call_sync(server->client, server, &msg, + status = nfs4_call_sync(dst_server->client, dst_server, &msg, &args->seq_args, &res->seq_res, 0); if (status == -ENOTSUPP) - server->caps &= ~NFS_CAP_COPY; + dst_server->caps &= ~NFS_CAP_COPY; if (status) goto out; @@ -280,8 +331,8 @@ static ssize_t _nfs42_proc_copy(struct file *src, } if (!res->synchronous) { - status = handle_async_copy(res, server, src, dst, - &args->src_stateid); + status = handle_async_copy(res, dst_server, src_server, src, + dst, &args->src_stateid, restart); if (status) return status; } @@ -304,8 +355,9 @@ out: } ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, - struct file *dst, loff_t pos_dst, - size_t count) + struct file *dst, loff_t pos_dst, size_t count, + struct nl4_server *nss, + nfs4_stateid *cnr_stateid, bool sync) { struct nfs_server *server = NFS_SERVER(file_inode(dst)); struct nfs_lock_context *src_lock; @@ -316,7 +368,7 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, .dst_fh = NFS_FH(file_inode(dst)), .dst_pos = pos_dst, .count = count, - .sync = false, + .sync = sync, }; struct nfs42_copy_res res; struct nfs4_exception src_exception = { @@ -328,6 +380,7 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, .stateid = &args.dst_stateid, }; ssize_t err, err2; + bool restart = false; src_lock = nfs_get_lock_context(nfs_file_open_context(src)); if (IS_ERR(src_lock)) @@ -347,21 +400,33 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, inode_lock(file_inode(dst)); err = _nfs42_proc_copy(src, src_lock, dst, dst_lock, - &args, &res); + &args, &res, + nss, cnr_stateid, &restart); inode_unlock(file_inode(dst)); if (err >= 0) break; - if (err == -ENOTSUPP) { + if (err == -ENOTSUPP && + nfs42_files_from_same_server(src, dst)) { err = -EOPNOTSUPP; break; } else if (err == -EAGAIN) { - dst_exception.retry = 1; - continue; + if (!restart) { + dst_exception.retry = 1; + continue; + } + break; } else if (err == -NFS4ERR_OFFLOAD_NO_REQS && !args.sync) { args.sync = true; dst_exception.retry = 1; continue; + } else if ((err == -ESTALE || + err == -NFS4ERR_OFFLOAD_DENIED || + err == -ENOTSUPP) && + !nfs42_files_from_same_server(src, dst)) { + nfs42_do_offload_cancel_async(src, &args.src_stateid); + err = -EOPNOTSUPP; + break; } err2 = nfs4_handle_exception(server, err, &src_exception); @@ -459,6 +524,76 @@ static int nfs42_do_offload_cancel_async(struct file *dst, return status; } +static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, + struct nfs42_copy_notify_args *args, + struct nfs42_copy_notify_res *res) +{ + struct nfs_server *src_server = NFS_SERVER(file_inode(src)); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY_NOTIFY], + .rpc_argp = args, + .rpc_resp = res, + }; + int status; + struct nfs_open_context *ctx; + struct nfs_lock_context *l_ctx; + + ctx = get_nfs_open_context(nfs_file_open_context(src)); + l_ctx = nfs_get_lock_context(ctx); + if (IS_ERR(l_ctx)) + return PTR_ERR(l_ctx); + + status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx, + FMODE_READ); + nfs_put_lock_context(l_ctx); + if (status) + return status; + + status = nfs4_call_sync(src_server->client, src_server, &msg, + &args->cna_seq_args, &res->cnr_seq_res, 0); + if (status == -ENOTSUPP) + src_server->caps &= ~NFS_CAP_COPY_NOTIFY; + + put_nfs_open_context(nfs_file_open_context(src)); + return status; +} + +int nfs42_proc_copy_notify(struct file *src, struct file *dst, + struct nfs42_copy_notify_res *res) +{ + struct nfs_server *src_server = NFS_SERVER(file_inode(src)); + struct nfs42_copy_notify_args *args; + struct nfs4_exception exception = { + .inode = file_inode(src), + }; + int status; + + if (!(src_server->caps & NFS_CAP_COPY_NOTIFY)) + return -EOPNOTSUPP; + + args = kzalloc(sizeof(struct nfs42_copy_notify_args), GFP_NOFS); + if (args == NULL) + return -ENOMEM; + + args->cna_src_fh = NFS_FH(file_inode(src)), + args->cna_dst.nl4_type = NL4_NETADDR; + nfs42_set_netaddr(dst, &args->cna_dst.u.nl4_addr); + exception.stateid = &args->cna_src_stateid; + + do { + status = _nfs42_proc_copy_notify(src, dst, args, res); + if (status == -ENOTSUPP) { + status = -EOPNOTSUPP; + goto out; + } + status = nfs4_handle_exception(src_server, status, &exception); + } while (exception.retry); + +out: + kfree(args); + return status; +} + static loff_t _nfs42_proc_llseek(struct file *filep, struct nfs_lock_context *lock, loff_t offset, int whence) { diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index aed865a84629..c03f3246d6c5 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -21,7 +21,10 @@ #define encode_copy_maxsz (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_STATEID_SIZE) + \ XDR_QUADLEN(NFS4_STATEID_SIZE) + \ - 2 + 2 + 2 + 1 + 1 + 1) + 2 + 2 + 2 + 1 + 1 + 1 +\ + 1 + /* One cnr_source_server */\ + 1 + /* nl4_type */ \ + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT)) #define decode_copy_maxsz (op_decode_hdr_maxsz + \ NFS42_WRITE_RES_SIZE + \ 1 /* cr_consecutive */ + \ @@ -29,6 +32,16 @@ #define encode_offload_cancel_maxsz (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_STATEID_SIZE)) #define decode_offload_cancel_maxsz (op_decode_hdr_maxsz) +#define encode_copy_notify_maxsz (op_encode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_STATEID_SIZE) + \ + 1 + /* nl4_type */ \ + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT)) +#define decode_copy_notify_maxsz (op_decode_hdr_maxsz + \ + 3 + /* cnr_lease_time */\ + XDR_QUADLEN(NFS4_STATEID_SIZE) + \ + 1 + /* Support 1 cnr_source_server */\ + 1 + /* nl4_type */ \ + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT)) #define encode_deallocate_maxsz (op_encode_hdr_maxsz + \ encode_fallocate_maxsz) #define decode_deallocate_maxsz (op_decode_hdr_maxsz) @@ -99,6 +112,12 @@ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_offload_cancel_maxsz) +#define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_copy_notify_maxsz) +#define NFS4_dec_copy_notify_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_copy_notify_maxsz) #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -166,6 +185,26 @@ static void encode_allocate(struct xdr_stream *xdr, encode_fallocate(xdr, args); } +static void encode_nl4_server(struct xdr_stream *xdr, + const struct nl4_server *ns) +{ + encode_uint32(xdr, ns->nl4_type); + switch (ns->nl4_type) { + case NL4_NAME: + case NL4_URL: + encode_string(xdr, ns->u.nl4_str_sz, ns->u.nl4_str); + break; + case NL4_NETADDR: + encode_string(xdr, ns->u.nl4_addr.netid_len, + ns->u.nl4_addr.netid); + encode_string(xdr, ns->u.nl4_addr.addr_len, + ns->u.nl4_addr.addr); + break; + default: + WARN_ON_ONCE(1); + } +} + static void encode_copy(struct xdr_stream *xdr, const struct nfs42_copy_args *args, struct compound_hdr *hdr) @@ -180,7 +219,12 @@ static void encode_copy(struct xdr_stream *xdr, encode_uint32(xdr, 1); /* consecutive = true */ encode_uint32(xdr, args->sync); - encode_uint32(xdr, 0); /* src server list */ + if (args->cp_src == NULL) { /* intra-ssc */ + encode_uint32(xdr, 0); /* no src server list */ + return; + } + encode_uint32(xdr, 1); /* supporting 1 server */ + encode_nl4_server(xdr, args->cp_src); } static void encode_offload_cancel(struct xdr_stream *xdr, @@ -191,6 +235,15 @@ static void encode_offload_cancel(struct xdr_stream *xdr, encode_nfs4_stateid(xdr, &args->osa_stateid); } +static void encode_copy_notify(struct xdr_stream *xdr, + const struct nfs42_copy_notify_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_COPY_NOTIFY, decode_copy_notify_maxsz, hdr); + encode_nfs4_stateid(xdr, &args->cna_src_stateid); + encode_nl4_server(xdr, &args->cna_dst); +} + static void encode_deallocate(struct xdr_stream *xdr, const struct nfs42_falloc_args *args, struct compound_hdr *hdr) @@ -355,6 +408,25 @@ static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req, } /* + * Encode COPY_NOTIFY request + */ +static void nfs4_xdr_enc_copy_notify(struct rpc_rqst *req, + struct xdr_stream *xdr, + const void *data) +{ + const struct nfs42_copy_notify_args *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->cna_seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->cna_seq_args, &hdr); + encode_putfh(xdr, args->cna_src_fh, &hdr); + encode_copy_notify(xdr, args, &hdr); + encode_nops(&hdr); +} + +/* * Encode DEALLOCATE request */ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req, @@ -490,6 +562,58 @@ static int decode_write_response(struct xdr_stream *xdr, return decode_verifier(xdr, &res->verifier.verifier); } +static int decode_nl4_server(struct xdr_stream *xdr, struct nl4_server *ns) +{ + struct nfs42_netaddr *naddr; + uint32_t dummy; + char *dummy_str; + __be32 *p; + int status; + + /* nl_type */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + ns->nl4_type = be32_to_cpup(p); + switch (ns->nl4_type) { + case NL4_NAME: + case NL4_URL: + status = decode_opaque_inline(xdr, &dummy, &dummy_str); + if (unlikely(status)) + return status; + if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) + return -EIO; + memcpy(&ns->u.nl4_str, dummy_str, dummy); + ns->u.nl4_str_sz = dummy; + break; + case NL4_NETADDR: + naddr = &ns->u.nl4_addr; + + /* netid string */ + status = decode_opaque_inline(xdr, &dummy, &dummy_str); + if (unlikely(status)) + return status; + if (unlikely(dummy > RPCBIND_MAXNETIDLEN)) + return -EIO; + naddr->netid_len = dummy; + memcpy(naddr->netid, dummy_str, naddr->netid_len); + + /* uaddr string */ + status = decode_opaque_inline(xdr, &dummy, &dummy_str); + if (unlikely(status)) + return status; + if (unlikely(dummy > RPCBIND_MAXUADDRLEN)) + return -EIO; + naddr->addr_len = dummy; + memcpy(naddr->addr, dummy_str, naddr->addr_len); + break; + default: + WARN_ON_ONCE(1); + return -EIO; + } + return 0; +} + static int decode_copy_requirements(struct xdr_stream *xdr, struct nfs42_copy_res *res) { __be32 *p; @@ -529,6 +653,42 @@ static int decode_offload_cancel(struct xdr_stream *xdr, return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL); } +static int decode_copy_notify(struct xdr_stream *xdr, + struct nfs42_copy_notify_res *res) +{ + __be32 *p; + int status, count; + + status = decode_op_hdr(xdr, OP_COPY_NOTIFY); + if (status) + return status; + /* cnr_lease_time */ + p = xdr_inline_decode(xdr, 12); + if (unlikely(!p)) + return -EIO; + p = xdr_decode_hyper(p, &res->cnr_lease_time.seconds); + res->cnr_lease_time.nseconds = be32_to_cpup(p); + + status = decode_opaque_fixed(xdr, &res->cnr_stateid, NFS4_STATEID_SIZE); + if (unlikely(status)) + return -EIO; + + /* number of source addresses */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + count = be32_to_cpup(p); + if (count > 1) + pr_warn("NFS: %s: nsvr %d > Supported. Use first servers\n", + __func__, count); + + status = decode_nl4_server(xdr, &res->cnr_src); + if (unlikely(status)) + return -EIO; + return 0; +} + static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_DEALLOCATE); @@ -657,6 +817,32 @@ out: } /* + * Decode COPY_NOTIFY response + */ +static int nfs4_xdr_dec_copy_notify(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfs42_copy_notify_res *res = data; + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->cnr_seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_copy_notify(xdr, res); + +out: + return status; +} + +/* * Decode DEALLOCATE request */ static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 16b2e5cc3e94..a4520115d8a3 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -168,6 +168,8 @@ enum { NFS_STATE_CHANGE_WAIT, /* A state changing operation is outstanding */ #ifdef CONFIG_NFS_V4_2 NFS_CLNT_DST_SSC_COPY_STATE, /* dst server open state on client*/ + NFS_CLNT_SRC_SSC_COPY_STATE, /* src server open state on client*/ + NFS_SRV_SSC_COPY_STATE, /* ssc state on the dst server */ #endif /* CONFIG_NFS_V4_2 */ }; @@ -311,6 +313,13 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, fmode_t fmode); +extern int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr, struct nfs4_label *label, + struct inode *inode); +extern int update_open_stateid(struct nfs4_state *state, + const nfs4_stateid *open_stateid, + const nfs4_stateid *deleg_stateid, + fmode_t fmode); extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); @@ -445,6 +454,8 @@ extern void nfs4_set_lease_period(struct nfs_client *clp, /* nfs4state.c */ +extern const nfs4_stateid current_stateid; + const struct cred *nfs4_get_clid_cred(struct nfs_client *clp); const struct cred *nfs4_get_machine_cred(struct nfs_client *clp); const struct cred *nfs4_get_renew_cred(struct nfs_client *clp); @@ -457,6 +468,8 @@ int nfs41_discover_server_trunking(struct nfs_client *clp, struct nfs_client **, const struct cred *); extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); extern void nfs41_notify_server(struct nfs_client *); +bool nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1, + struct nfs41_server_owner *o2); #else static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) { @@ -572,6 +585,12 @@ static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stat return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0; } +static inline bool nfs4_stateid_match_or_older(const nfs4_stateid *dst, const nfs4_stateid *src) +{ + return nfs4_stateid_match_other(dst, src) && + !(src->seqid && nfs4_stateid_is_newer(dst, src)); +} + static inline void nfs4_stateid_seqid_inc(nfs4_stateid *s1) { u32 seqid = be32_to_cpu(s1->seqid); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index da6204025a2d..460d6251c405 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -629,7 +629,7 @@ out: /* * Returns true if the server major ids match */ -static bool +bool nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1, struct nfs41_server_owner *o2) { @@ -879,14 +879,17 @@ static int nfs4_set_client(struct nfs_server *server, }; struct nfs_client *clp; - if (minorversion > 0 && proto == XPRT_TRANSPORT_TCP) + if (minorversion == 0) + __set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags); + else if (proto == XPRT_TRANSPORT_TCP) cl_init.nconnect = nconnect; + if (server->flags & NFS_MOUNT_NORESVPORT) - set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); if (server->options & NFS_OPTION_MIGRATION) - set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); + __set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status)) - set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags); + __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags); server->port = rpc_get_port(addr); /* Allocate or find a client reference we can use */ diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 339663d04bf8..620de905cba9 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -133,14 +133,55 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t count, unsigned int flags) { + struct nfs42_copy_notify_res *cn_resp = NULL; + struct nl4_server *nss = NULL; + nfs4_stateid *cnrs = NULL; + ssize_t ret; + bool sync = false; + /* Only offload copy if superblock is the same */ - if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) + if (file_in->f_op != &nfs4_file_operations) return -EXDEV; if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY)) return -EOPNOTSUPP; if (file_inode(file_in) == file_inode(file_out)) return -EOPNOTSUPP; - return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); + /* if the copy size if smaller than 2 RPC payloads, make it + * synchronous + */ + if (count <= 2 * NFS_SERVER(file_inode(file_in))->rsize) + sync = true; +retry: + if (!nfs42_files_from_same_server(file_in, file_out)) { + /* for inter copy, if copy size if smaller than 12 RPC + * payloads, fallback to traditional copy. There are + * 14 RPCs during an NFSv4.x mount between source/dest + * servers. + */ + if (sync || + count <= 14 * NFS_SERVER(file_inode(file_in))->rsize) + return -EOPNOTSUPP; + cn_resp = kzalloc(sizeof(struct nfs42_copy_notify_res), + GFP_NOFS); + if (unlikely(cn_resp == NULL)) + return -ENOMEM; + + ret = nfs42_proc_copy_notify(file_in, file_out, cn_resp); + if (ret) { + ret = -EOPNOTSUPP; + goto out; + } + nss = &cn_resp->cnr_src; + cnrs = &cn_resp->cnr_stateid; + } + ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count, + nss, cnrs, sync); +out: + if (!nfs42_files_from_same_server(file_in, file_out)) + kfree(cn_resp); + if (ret == -EAGAIN) + goto retry; + return ret; } static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, @@ -263,6 +304,102 @@ out_unlock: out: return ret < 0 ? ret : count; } + +static int read_name_gen = 1; +#define SSC_READ_NAME_BODY "ssc_read_%d" + +struct file * +nfs42_ssc_open(struct vfsmount *ss_mnt, struct nfs_fh *src_fh, + nfs4_stateid *stateid) +{ + struct nfs_fattr fattr; + struct file *filep, *res; + struct nfs_server *server; + struct inode *r_ino = NULL; + struct nfs_open_context *ctx; + struct nfs4_state_owner *sp; + char *read_name = NULL; + int len, status = 0; + + server = NFS_SERVER(ss_mnt->mnt_root->d_inode); + + nfs_fattr_init(&fattr); + + status = nfs4_proc_getattr(server, src_fh, &fattr, NULL, NULL); + if (status < 0) { + res = ERR_PTR(status); + goto out; + } + + res = ERR_PTR(-ENOMEM); + len = strlen(SSC_READ_NAME_BODY) + 16; + read_name = kzalloc(len, GFP_NOFS); + if (read_name == NULL) + goto out; + snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++); + + r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, &fattr, + NULL); + if (IS_ERR(r_ino)) { + res = ERR_CAST(r_ino); + goto out_free_name; + } + + filep = alloc_file_pseudo(r_ino, ss_mnt, read_name, FMODE_READ, + r_ino->i_fop); + if (IS_ERR(filep)) { + res = ERR_CAST(filep); + goto out_free_name; + } + filep->f_mode |= FMODE_READ; + + ctx = alloc_nfs_open_context(filep->f_path.dentry, filep->f_mode, + filep); + if (IS_ERR(ctx)) { + res = ERR_CAST(ctx); + goto out_filep; + } + + res = ERR_PTR(-EINVAL); + sp = nfs4_get_state_owner(server, ctx->cred, GFP_KERNEL); + if (sp == NULL) + goto out_ctx; + + ctx->state = nfs4_get_open_state(r_ino, sp); + if (ctx->state == NULL) + goto out_stateowner; + + set_bit(NFS_SRV_SSC_COPY_STATE, &ctx->state->flags); + set_bit(NFS_OPEN_STATE, &ctx->state->flags); + memcpy(&ctx->state->open_stateid.other, &stateid->other, + NFS4_STATEID_OTHER_SIZE); + update_open_stateid(ctx->state, stateid, NULL, filep->f_mode); + + nfs_file_set_open_context(filep, ctx); + put_nfs_open_context(ctx); + + file_ra_state_init(&filep->f_ra, filep->f_mapping->host->i_mapping); + res = filep; +out_free_name: + kfree(read_name); +out: + return res; +out_stateowner: + nfs4_put_state_owner(sp); +out_ctx: + put_nfs_open_context(ctx); +out_filep: + fput(filep); + goto out_free_name; +} +EXPORT_SYMBOL_GPL(nfs42_ssc_open); +void nfs42_ssc_close(struct file *filep) +{ + struct nfs_open_context *ctx = nfs_file_open_context(filep); + + ctx->state->flags = 0; +} +EXPORT_SYMBOL_GPL(nfs42_ssc_close); #endif /* CONFIG_NFS_V4_2 */ const struct file_operations nfs4_file_operations = { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ab8ca20fd579..76d37161409a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -91,7 +91,6 @@ struct nfs4_opendata; static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); -static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label, struct inode *inode); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode); static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, @@ -476,6 +475,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server, case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_PARTNER_NO_AUTH: if (inode != NULL && stateid != NULL) { nfs_inode_find_state_and_recover(inode, stateid); @@ -521,9 +521,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server, case -NFS4ERR_DEADSESSION: case -NFS4ERR_SEQ_FALSE_RETRY: case -NFS4ERR_SEQ_MISORDERED: - dprintk("%s ERROR: %d Reset session\n", __func__, - errorcode); - nfs4_schedule_session_recovery(clp->cl_session, errorcode); + /* Handled in nfs41_sequence_process() */ goto wait_on_recovery; #endif /* defined(CONFIG_NFS_V4_1) */ case -NFS4ERR_FILE_OPEN: @@ -782,6 +780,7 @@ static int nfs41_sequence_process(struct rpc_task *task, struct nfs4_session *session; struct nfs4_slot *slot = res->sr_slot; struct nfs_client *clp; + int status; int ret = 1; if (slot == NULL) @@ -793,8 +792,13 @@ static int nfs41_sequence_process(struct rpc_task *task, session = slot->table->session; trace_nfs4_sequence_done(session, res); + + status = res->sr_status; + if (task->tk_status == -NFS4ERR_DEADSESSION) + status = -NFS4ERR_DEADSESSION; + /* Check the SEQUENCE operation status */ - switch (res->sr_status) { + switch (status) { case 0: /* Mark this sequence number as having been acked */ nfs4_slot_sequence_acked(slot, slot->seq_nr); @@ -866,6 +870,10 @@ static int nfs41_sequence_process(struct rpc_task *task, */ slot->seq_nr = slot->seq_nr_highest_sent; goto out_retry; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + goto session_recover; default: /* Just update the slot sequence no. */ slot->seq_done = 1; @@ -876,8 +884,10 @@ out: out_noaction: return ret; session_recover: - nfs4_schedule_session_recovery(session, res->sr_status); - goto retry_nowait; + nfs4_schedule_session_recovery(session, status); + dprintk("%s ERROR: %d Reset session\n", __func__, status); + nfs41_sequence_free_slot(res); + goto out; retry_new_seq: ++slot->seq_nr; retry_nowait: @@ -1440,8 +1450,6 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode, return 0; if ((delegation->type & fmode) != fmode) return 0; - if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) - return 0; switch (claim) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_FH: @@ -1718,7 +1726,7 @@ static void nfs_state_clear_delegation(struct nfs4_state *state) write_sequnlock(&state->seqlock); } -static int update_open_stateid(struct nfs4_state *state, +int update_open_stateid(struct nfs4_state *state, const nfs4_stateid *open_stateid, const nfs4_stateid *delegation, fmode_t fmode) @@ -1739,7 +1747,7 @@ static int update_open_stateid(struct nfs4_state *state, ret = 1; } - deleg_cur = rcu_dereference(nfsi->delegation); + deleg_cur = nfs4_get_valid_delegation(state->inode); if (deleg_cur == NULL) goto no_delegation; @@ -1751,7 +1759,7 @@ static int update_open_stateid(struct nfs4_state *state, if (delegation == NULL) delegation = &deleg_cur->stateid; - else if (!nfs4_stateid_match(&deleg_cur->stateid, delegation)) + else if (!nfs4_stateid_match_other(&deleg_cur->stateid, delegation)) goto no_delegation_unlock; nfs_mark_delegation_referenced(deleg_cur); @@ -1798,7 +1806,7 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo fmode &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); - delegation = rcu_dereference(NFS_I(inode)->delegation); + delegation = nfs4_get_valid_delegation(inode); if (delegation == NULL || (delegation->type & fmode) == fmode) { rcu_read_unlock(); return; @@ -1810,7 +1818,6 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) { struct nfs4_state *state = opendata->state; - struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *delegation; int open_mode = opendata->o_arg.open_flags; fmode_t fmode = opendata->o_arg.fmode; @@ -1827,7 +1834,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) } spin_unlock(&state->owner->so_lock); rcu_read_lock(); - delegation = rcu_dereference(nfsi->delegation); + delegation = nfs4_get_valid_delegation(state->inode); if (!can_open_delegated(delegation, fmode, claim)) { rcu_read_unlock(); break; @@ -2191,7 +2198,6 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct case -NFS4ERR_BAD_HIGH_SLOT: case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: case -NFS4ERR_DEADSESSION: - nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); return -EAGAIN; case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: @@ -2371,7 +2377,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) data->o_arg.open_flags, claim)) goto out_no_action; rcu_read_lock(); - delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); + delegation = nfs4_get_valid_delegation(data->state->inode); if (can_open_delegated(delegation, data->o_arg.fmode, claim)) goto unlock_no_action; rcu_read_unlock(); @@ -4065,7 +4071,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); } -static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, +int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode) { @@ -5101,12 +5107,12 @@ static bool nfs4_stateid_is_current(nfs4_stateid *stateid, const struct nfs_lock_context *l_ctx, fmode_t fmode) { - nfs4_stateid current_stateid; + nfs4_stateid _current_stateid; /* If the current stateid represents a lost lock, then exit */ - if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode) == -EIO) + if (nfs4_set_rw_stateid(&_current_stateid, ctx, l_ctx, fmode) == -EIO) return true; - return nfs4_stateid_match(stateid, ¤t_stateid); + return nfs4_stateid_match(stateid, &_current_stateid); } static bool nfs4_error_stateid_expired(int err) @@ -6199,10 +6205,13 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) task->tk_status = 0; break; case -NFS4ERR_OLD_STATEID: - if (nfs4_refresh_delegation_stateid(&data->stateid, data->inode)) - goto out_restart; - task->tk_status = 0; - break; + if (!nfs4_refresh_delegation_stateid(&data->stateid, data->inode)) + nfs4_stateid_seqid_inc(&data->stateid); + if (data->args.bitmask) { + data->args.bitmask = NULL; + data->res.fattr = NULL; + } + goto out_restart; case -NFS4ERR_ACCESS: if (data->args.bitmask) { data->args.bitmask = NULL; @@ -6217,6 +6226,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) if (exception.retry) goto out_restart; } + nfs_delegation_mark_returned(data->inode, data->args.stateid); data->rpc_status = task->tk_status; return; out_restart: @@ -6246,8 +6256,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) d_data = (struct nfs4_delegreturndata *)data; - if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) + if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) { + nfs4_sequence_done(task, &d_data->res.seq_res); return; + } lo = d_data->args.lr_args ? d_data->args.lr_args->layout : NULL; if (lo && !pnfs_layout_is_valid(lo)) { @@ -7823,6 +7835,15 @@ nfs41_same_server_scope(struct nfs41_server_scope *a, static void nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) { + struct nfs41_bind_conn_to_session_args *args = task->tk_msg.rpc_argp; + struct nfs_client *clp = args->client; + + switch (task->tk_status) { + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + nfs4_schedule_session_recovery(clp->cl_session, + task->tk_status); + } } static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = { @@ -8870,8 +8891,6 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf case -NFS4ERR_BADSESSION: case -NFS4ERR_DEADSESSION: case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - nfs4_schedule_session_recovery(clp->cl_session, - task->tk_status); break; default: nfs4_schedule_lease_recovery(clp); @@ -9900,6 +9919,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_ALLOCATE | NFS_CAP_COPY | NFS_CAP_OFFLOAD_CANCEL + | NFS_CAP_COPY_NOTIFY | NFS_CAP_DEALLOCATE | NFS_CAP_SEEK | NFS_CAP_LAYOUTSTATS diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0c6d53dc3672..ba97b9cf632c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1407,7 +1407,7 @@ nfs_state_find_lock_state_by_stateid(struct nfs4_state *state, list_for_each_entry(pos, &state->lock_states, ls_locks) { if (!test_bit(NFS_LOCK_INITIALIZED, &pos->ls_flags)) continue; - if (nfs4_stateid_match_other(&pos->ls_stateid, stateid)) + if (nfs4_stateid_match_or_older(&pos->ls_stateid, stateid)) return pos; } return NULL; @@ -1441,12 +1441,13 @@ void nfs_inode_find_state_and_recover(struct inode *inode, state = ctx->state; if (state == NULL) continue; - if (nfs4_stateid_match_other(&state->stateid, stateid) && + if (nfs4_stateid_match_or_older(&state->stateid, stateid) && nfs4_state_mark_reclaim_nograce(clp, state)) { found = true; continue; } - if (nfs4_stateid_match_other(&state->open_stateid, stateid) && + if (test_bit(NFS_OPEN_STATE, &state->flags) && + nfs4_stateid_match_or_older(&state->open_stateid, stateid) && nfs4_state_mark_reclaim_nograce(clp, state)) { found = true; continue; @@ -1556,16 +1557,32 @@ static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state { struct nfs4_copy_state *copy; - if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags)) + if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) && + !test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags)) return; spin_lock(&sp->so_server->nfs_client->cl_lock); list_for_each_entry(copy, &sp->so_server->ss_copies, copies) { - if (!nfs4_stateid_match_other(&state->stateid, ©->parent_state->stateid)) - continue; + if ((test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) && + !nfs4_stateid_match_other(&state->stateid, + ©->parent_dst_state->stateid))) + continue; copy->flags = 1; - complete(©->completion); - break; + if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE, + &state->flags)) { + clear_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags); + complete(©->completion); + } + } + list_for_each_entry(copy, &sp->so_server->ss_copies, src_copies) { + if ((test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags) && + !nfs4_stateid_match_other(&state->stateid, + ©->parent_src_state->stateid))) + continue; + copy->flags = 1; + if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE, + &state->flags)) + complete(©->completion); } spin_unlock(&sp->so_server->nfs_client->cl_lock); } @@ -1609,6 +1626,9 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs struct nfs4_state *state; unsigned int loop = 0; int status = 0; +#ifdef CONFIG_NFS_V4_2 + bool found_ssc_copy_state = false; +#endif /* CONFIG_NFS_V4_2 */ /* Note: we rely on the sp->so_states list being ordered * so that we always reclaim open(O_RDWR) and/or open(O_WRITE) @@ -1628,6 +1648,13 @@ restart: continue; if (state->state == 0) continue; +#ifdef CONFIG_NFS_V4_2 + if (test_bit(NFS_SRV_SSC_COPY_STATE, &state->flags)) { + nfs4_state_mark_recovery_failed(state, -EIO); + found_ssc_copy_state = true; + continue; + } +#endif /* CONFIG_NFS_V4_2 */ refcount_inc(&state->count); spin_unlock(&sp->so_lock); status = __nfs4_reclaim_open_state(sp, state, ops); @@ -1682,6 +1709,10 @@ restart: } raw_write_seqcount_end(&sp->so_reclaim_seqcount); spin_unlock(&sp->so_lock); +#ifdef CONFIG_NFS_V4_2 + if (found_ssc_copy_state) + return -EIO; +#endif /* CONFIG_NFS_V4_2 */ return 0; out_err: nfs4_put_open_state(state); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 04c57066a11a..2c9cbade561a 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -92,8 +92,8 @@ static void nfs4_evict_inode(struct inode *inode) { truncate_inode_pages_final(&inode->i_data); clear_inode(inode); - /* If we are holding a delegation, return it! */ - nfs_inode_return_delegation_noreclaim(inode); + /* If we are holding a delegation, return and free it */ + nfs_inode_evict_delegation(inode); /* Note that above delegreturn would trigger pnfs return-on-close */ pnfs_return_layout(inode); pnfs_destroy_layout(NFS_I(inode)); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ab07db0f07cd..936c57779ff4 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1059,7 +1059,7 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve } static __be32 * -xdr_encode_nfstime4(__be32 *p, const struct timespec *t) +xdr_encode_nfstime4(__be32 *p, const struct timespec64 *t) { p = xdr_encode_hyper(p, (__s64)t->tv_sec); *p++ = cpu_to_be32(t->tv_nsec); @@ -1072,7 +1072,6 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server, const uint32_t attrmask[]) { - struct timespec ts; char owner_name[IDMAP_NAMESZ]; char owner_group[IDMAP_NAMESZ]; int owner_namelen = 0; @@ -1161,16 +1160,14 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { if (iap->ia_valid & ATTR_ATIME_SET) { *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - ts = timespec64_to_timespec(iap->ia_atime); - p = xdr_encode_nfstime4(p, &ts); + p = xdr_encode_nfstime4(p, &iap->ia_atime); } else *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); } if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { if (iap->ia_valid & ATTR_MTIME_SET) { *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - ts = timespec64_to_timespec(iap->ia_mtime); - p = xdr_encode_nfstime4(p, &ts); + p = xdr_encode_nfstime4(p, &iap->ia_mtime); } else *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); } @@ -4065,17 +4062,17 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint } static __be32 * -xdr_decode_nfstime4(__be32 *p, struct timespec *t) +xdr_decode_nfstime4(__be32 *p, struct timespec64 *t) { __u64 sec; p = xdr_decode_hyper(p, &sec); - t-> tv_sec = (time_t)sec; + t-> tv_sec = sec; t->tv_nsec = be32_to_cpup(p++); return p; } -static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) +static int decode_attr_time(struct xdr_stream *xdr, struct timespec64 *time) { __be32 *p; @@ -4086,7 +4083,7 @@ static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) return 0; } -static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) +static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time) { int status = 0; @@ -4104,7 +4101,7 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str return status; } -static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) +static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time) { int status = 0; @@ -4123,7 +4120,7 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s } static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap, - struct timespec *time) + struct timespec64 *time) { int status = 0; @@ -4186,7 +4183,7 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, return status; } -static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) +static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time) { int status = 0; @@ -7581,6 +7578,7 @@ const struct rpc_procinfo nfs4_procedures[] = { PROC42(CLONE, enc_clone, dec_clone), PROC42(COPY, enc_copy, dec_copy), PROC42(OFFLOAD_CANCEL, enc_offload_cancel, dec_offload_cancel), + PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify), PROC(LOOKUPP, enc_lookupp, dec_lookupp), PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror), }; diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 361cc10d6f95..f64a33d2a1d1 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -1065,6 +1065,39 @@ TRACE_EVENT(nfs_commit_done, ) ); +TRACE_EVENT(nfs_fh_to_dentry, + TP_PROTO( + const struct super_block *sb, + const struct nfs_fh *fh, + u64 fileid, + int error + ), + + TP_ARGS(sb, fh, fileid, error), + + TP_STRUCT__entry( + __field(int, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + ), + + TP_fast_assign( + __entry->error = error; + __entry->dev = sb->s_dev; + __entry->fileid = fileid; + __entry->fhandle = nfs_fhandle_hash(fh); + ), + + TP_printk( + "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x ", + __entry->error, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle + ) +); + TRACE_DEFINE_ENUM(NFS_OK); TRACE_DEFINE_ENUM(NFSERR_PERM); TRACE_DEFINE_ENUM(NFSERR_NOENT); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bb80034a7661..cec3070ab577 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2160,8 +2160,6 @@ out_unlock: return NULL; } -extern const nfs4_stateid current_stateid; - static void _lgopen_prepare_attached(struct nfs4_opendata *data, struct nfs_open_context *ctx) { diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a84df7d63403..8d8d04bb9d64 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1592,7 +1592,7 @@ static int nfs_parse_mount_options(char *raw, dfprintk(MOUNT, "NFS: invalid " "lookupcache argument\n"); return 0; - }; + } break; case Opt_fscache_uniq: if (nfs_get_option_str(args, &mnt->fscache_uniq)) @@ -1625,7 +1625,7 @@ static int nfs_parse_mount_options(char *raw, dfprintk(MOUNT, "NFS: invalid " "local_lock argument\n"); return 0; - }; + } break; /* @@ -2585,7 +2585,7 @@ static void nfs_get_cache_cookie(struct super_block *sb, if (mnt_s->fscache_key) { uniq = mnt_s->fscache_key->key.uniquifier; ulen = mnt_s->fscache_key->key.uniq_len; - }; + } } else return; diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 4f3390b20239..c489496b5659 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -121,8 +121,7 @@ static void nfs_netns_client_release(struct kobject *kobj) struct nfs_netns_client, kobject); - if (c->identifier) - kfree(c->identifier); + kfree(c->identifier); kfree(c); } |