summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/block-group.c1
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/delalloc-space.c7
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/file.c43
-rw-r--r--fs/btrfs/inode-map.c4
-rw-r--r--fs/btrfs/inode.c12
-rw-r--r--fs/btrfs/ioctl.c6
-rw-r--r--fs/btrfs/qgroup.c4
-rw-r--r--fs/btrfs/relocation.c9
-rw-r--r--fs/cifs/cifsfs.c8
-rw-r--r--fs/cifs/cifsglob.h5
-rw-r--r--fs/cifs/cifsproto.h1
-rw-r--r--fs/cifs/connect.c18
-rw-r--r--fs/cifs/file.c29
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/smb1ops.c3
-rw-r--r--fs/cifs/smb2file.c2
-rw-r--r--fs/cifs/smb2ops.c3
-rw-r--r--fs/cifs/transport.c42
-rw-r--r--fs/dax.c5
-rw-r--r--fs/fuse/Makefile3
-rw-r--r--fs/fuse/dev.c4
-rw-r--r--fs/fuse/dir.c16
-rw-r--r--fs/fuse/file.c14
-rw-r--r--fs/fuse/fuse_i.h4
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/fuse/virtio_fs.c169
-rw-r--r--fs/gfs2/ops_fstype.c21
-rw-r--r--fs/io_uring.c221
-rw-r--r--fs/lockd/host.c3
-rw-r--r--fs/nfs/callback.h4
-rw-r--r--fs/nfs/callback_proc.c8
-rw-r--r--fs/nfs/callback_xdr.c6
-rw-r--r--fs/nfs/client.c11
-rw-r--r--fs/nfs/delegation.c170
-rw-r--r--fs/nfs/delegation.h4
-rw-r--r--fs/nfs/export.c1
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/inode.c54
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/namespace.c3
-rw-r--r--fs/nfs/nfs2xdr.c21
-rw-r--r--fs/nfs/nfs3client.c5
-rw-r--r--fs/nfs/nfs3xdr.c14
-rw-r--r--fs/nfs/nfs42.h15
-rw-r--r--fs/nfs/nfs42proc.c201
-rw-r--r--fs/nfs/nfs42xdr.c190
-rw-r--r--fs/nfs/nfs4_fs.h19
-rw-r--r--fs/nfs/nfs4client.c13
-rw-r--r--fs/nfs/nfs4file.c141
-rw-r--r--fs/nfs/nfs4proc.c76
-rw-r--r--fs/nfs/nfs4state.c47
-rw-r--r--fs/nfs/nfs4super.c4
-rw-r--r--fs/nfs/nfs4xdr.c24
-rw-r--r--fs/nfs/nfstrace.h33
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs/super.c6
-rw-r--r--fs/nfs/sysfs.c3
59 files changed, 1276 insertions, 479 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index bf7e3f23bba7..670700cb1110 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1761,6 +1761,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
btrfs_err(info,
"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
cache->key.objectid);
+ btrfs_put_block_group(cache);
ret = -EINVAL;
goto error;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 19d669d12ca1..fe2b8765d9e6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -734,8 +734,6 @@ struct btrfs_fs_info {
struct btrfs_workqueue *fixup_workers;
struct btrfs_workqueue *delayed_workers;
- /* the extent workers do delayed refs on the extent allocation tree */
- struct btrfs_workqueue *extent_workers;
struct task_struct *transaction_kthread;
struct task_struct *cleaner_kthread;
u32 thread_pool_size;
@@ -2489,8 +2487,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
int nitems, bool use_global_rsv);
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
- bool qgroup_free);
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index d949d7d2abed..db9f2c58eb4a 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -381,7 +381,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
out_qgroup:
btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
out_fail:
- btrfs_inode_rsv_release(inode, true);
if (delalloc_lock)
mutex_unlock(&inode->delalloc_mutex);
return ret;
@@ -418,7 +417,6 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
* btrfs_delalloc_release_extents - release our outstanding_extents
* @inode: the inode to balance the reservation for.
* @num_bytes: the number of bytes we originally reserved with
- * @qgroup_free: do we need to free qgroup meta reservation or convert them.
*
* When we reserve space we increase outstanding_extents for the extents we may
* add. Once we've set the range as delalloc or created our ordered extents we
@@ -426,8 +424,7 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
* temporarily tracked outstanding_extents. This _must_ be used in conjunction
* with btrfs_delalloc_reserve_metadata.
*/
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
- bool qgroup_free)
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned num_extents;
@@ -441,7 +438,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
if (btrfs_is_testing(fs_info))
return;
- btrfs_inode_rsv_release(inode, qgroup_free);
+ btrfs_inode_rsv_release(inode, true);
}
/**
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 044981cf6df9..402b61bf345c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2008,7 +2008,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->readahead_workers);
btrfs_destroy_workqueue(fs_info->flush_workers);
btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
- btrfs_destroy_workqueue(fs_info->extent_workers);
/*
* Now that all other work queues are destroyed, we can safely destroy
* the queues used for metadata I/O, since tasks from those other work
@@ -2214,10 +2213,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
max_active, 2);
fs_info->qgroup_rescan_workers =
btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0);
- fs_info->extent_workers =
- btrfs_alloc_workqueue(fs_info, "extent-refs", flags,
- min_t(u64, fs_devices->num_devices,
- max_active), 8);
if (!(fs_info->workers && fs_info->delalloc_workers &&
fs_info->submit_workers && fs_info->flush_workers &&
@@ -2228,7 +2223,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
fs_info->caching_workers && fs_info->readahead_workers &&
fs_info->fixup_workers && fs_info->delayed_workers &&
- fs_info->extent_workers &&
fs_info->qgroup_rescan_workers)) {
return -ENOMEM;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 27e5b269e729..435a502a3226 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1692,7 +1692,7 @@ again:
force_page_uptodate);
if (ret) {
btrfs_delalloc_release_extents(BTRFS_I(inode),
- reserve_bytes, true);
+ reserve_bytes);
break;
}
@@ -1704,7 +1704,7 @@ again:
if (extents_locked == -EAGAIN)
goto again;
btrfs_delalloc_release_extents(BTRFS_I(inode),
- reserve_bytes, true);
+ reserve_bytes);
ret = extents_locked;
break;
}
@@ -1772,8 +1772,7 @@ again:
else
free_extent_state(cached_state);
- btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes,
- true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
if (ret) {
btrfs_drop_pages(pages, num_pages);
break;
@@ -2068,25 +2067,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx;
int ret = 0, err;
- u64 len;
- /*
- * If the inode needs a full sync, make sure we use a full range to
- * avoid log tree corruption, due to hole detection racing with ordered
- * extent completion for adjacent ranges, and assertion failures during
- * hole detection.
- */
- if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags)) {
- start = 0;
- end = LLONG_MAX;
- }
-
- /*
- * The range length can be represented by u64, we have to do the typecasts
- * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync()
- */
- len = (u64)end - (u64)start + 1;
trace_btrfs_sync_file(file, datasync);
btrfs_init_log_ctx(&ctx, inode);
@@ -2113,6 +2094,19 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
atomic_inc(&root->log_batch);
/*
+ * If the inode needs a full sync, make sure we use a full range to
+ * avoid log tree corruption, due to hole detection racing with ordered
+ * extent completion for adjacent ranges, and assertion failures during
+ * hole detection. Do this while holding the inode lock, to avoid races
+ * with other tasks.
+ */
+ if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+ &BTRFS_I(inode)->runtime_flags)) {
+ start = 0;
+ end = LLONG_MAX;
+ }
+
+ /*
* Before we acquired the inode's lock, someone may have dirtied more
* pages in the target range. We need to make sure that writeback for
* any such pages does not start while we are logging the inode, because
@@ -2139,8 +2133,11 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
/*
* We have to do this here to avoid the priority inversion of waiting on
* IO of a lower priority task while holding a transaction open.
+ *
+ * Also, the range length can be represented by u64, we have to do the
+ * typecasts to avoid signed overflow if it's [0, LLONG_MAX].
*/
- ret = btrfs_wait_ordered_range(inode, start, len);
+ ret = btrfs_wait_ordered_range(inode, start, (u64)end - (u64)start + 1);
if (ret) {
up_write(&BTRFS_I(inode)->dio_sem);
inode_unlock(inode);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 63cad7865d75..37345fb6191d 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -501,13 +501,13 @@ again:
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint);
if (ret) {
- btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc, true);
goto out_put;
}
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
- btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, false);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
out_put:
iput(inode);
out_release:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0f2754eaa05b..c3f386b7cc0b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2206,7 +2206,7 @@ again:
ClearPageChecked(page);
set_page_dirty(page);
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state);
@@ -4951,7 +4951,7 @@ again:
if (!page) {
btrfs_delalloc_release_space(inode, data_reserved,
block_start, blocksize, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
ret = -ENOMEM;
goto out;
}
@@ -5018,7 +5018,7 @@ out_unlock:
if (ret)
btrfs_delalloc_release_space(inode, data_reserved, block_start,
blocksize, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0));
+ btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
unlock_page(page);
put_page(page);
out:
@@ -8709,7 +8709,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
} else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode, data_reserved,
offset, count - (size_t)ret, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), count, false);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), count);
}
out:
if (wakeup)
@@ -9059,7 +9059,7 @@ again:
unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
if (!ret2) {
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
sb_end_pagefault(inode->i_sb);
extent_changeset_free(data_reserved);
return VM_FAULT_LOCKED;
@@ -9068,7 +9068,7 @@ again:
out_unlock:
unlock_page(page);
out:
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0));
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
btrfs_delalloc_release_space(inode, data_reserved, page_start,
reserved_space, (ret != 0));
out_noreserve:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index de730e56d3f5..7c145a41decd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1360,8 +1360,7 @@ again:
unlock_page(pages[i]);
put_page(pages[i]);
}
- btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
- false);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
extent_changeset_free(data_reserved);
return i_done;
out:
@@ -1372,8 +1371,7 @@ out:
btrfs_delalloc_release_space(inode, data_reserved,
start_index << PAGE_SHIFT,
page_cnt << PAGE_SHIFT, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
- true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
extent_changeset_free(data_reserved);
return ret;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index c4bb69941c77..3ad151655eb8 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3629,7 +3629,7 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
return 0;
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
- trace_qgroup_meta_reserve(root, type, (s64)num_bytes);
+ trace_qgroup_meta_reserve(root, (s64)num_bytes, type);
ret = qgroup_reserve(root, num_bytes, enforce, type);
if (ret < 0)
return ret;
@@ -3676,7 +3676,7 @@ void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
*/
num_bytes = sub_root_meta_rsv(root, num_bytes, type);
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
- trace_qgroup_meta_reserve(root, type, -(s64)num_bytes);
+ trace_qgroup_meta_reserve(root, -(s64)num_bytes, type);
btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid,
num_bytes, type);
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 00504657b602..5cd42b66818c 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3277,6 +3277,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
if (!page) {
btrfs_delalloc_release_metadata(BTRFS_I(inode),
PAGE_SIZE, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode),
+ PAGE_SIZE);
ret = -ENOMEM;
goto out;
}
@@ -3297,7 +3299,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
btrfs_delalloc_release_metadata(BTRFS_I(inode),
PAGE_SIZE, true);
btrfs_delalloc_release_extents(BTRFS_I(inode),
- PAGE_SIZE, true);
+ PAGE_SIZE);
ret = -EIO;
goto out;
}
@@ -3326,7 +3328,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
btrfs_delalloc_release_metadata(BTRFS_I(inode),
PAGE_SIZE, true);
btrfs_delalloc_release_extents(BTRFS_I(inode),
- PAGE_SIZE, true);
+ PAGE_SIZE);
clear_extent_bits(&BTRFS_I(inode)->io_tree,
page_start, page_end,
@@ -3342,8 +3344,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
put_page(page);
index++;
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE,
- false);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
balance_dirty_pages_ratelimited(inode->i_mapping);
btrfs_throttle(fs_info);
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c049c7b3aa87..1a135d1b85bd 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -169,7 +169,13 @@ cifs_read_super(struct super_block *sb)
else
sb->s_maxbytes = MAX_NON_LFS;
- /* Some very old servers like DOS and OS/2 used 2 second granularity */
+ /*
+ * Some very old servers like DOS and OS/2 used 2 second granularity
+ * (while all current servers use 100ns granularity - see MS-DTYP)
+ * but 1 second is the maximum allowed granularity for the VFS
+ * so for old servers set time granularity to 1 second while for
+ * everything else (current servers) set it to 100ns.
+ */
if ((tcon->ses->server->vals->protocol_id == SMB10_PROT_ID) &&
((tcon->ses->capabilities &
tcon->ses->server->vals->cap_nt_find) == 0) &&
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 50dfd9049370..d78bfcc19156 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1391,6 +1391,11 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
struct cifsInodeInfo {
bool can_cache_brlcks;
struct list_head llist; /* locks helb by this inode */
+ /*
+ * NOTE: Some code paths call down_read(lock_sem) twice, so
+ * we must always use use cifs_down_write() instead of down_write()
+ * for this semaphore to avoid deadlocks.
+ */
struct rw_semaphore lock_sem; /* protect the fields above */
/* BB add in lists for dirty pages i.e. write caching info for oplock */
struct list_head openFileList;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e53e9f62b87b..fe597d3d5208 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -170,6 +170,7 @@ extern int cifs_unlock_range(struct cifsFileInfo *cfile,
struct file_lock *flock, const unsigned int xid);
extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile);
+extern void cifs_down_write(struct rw_semaphore *sem);
extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid,
struct file *file,
struct tcon_link *tlink,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a64dfa95a925..ccaa8bad336f 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -564,9 +564,11 @@ cifs_reconnect(struct TCP_Server_Info *server)
spin_lock(&GlobalMid_Lock);
list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
+ kref_get(&mid_entry->refcount);
if (mid_entry->mid_state == MID_REQUEST_SUBMITTED)
mid_entry->mid_state = MID_RETRY_NEEDED;
list_move(&mid_entry->qhead, &retry_list);
+ mid_entry->mid_flags |= MID_DELETED;
}
spin_unlock(&GlobalMid_Lock);
mutex_unlock(&server->srv_mutex);
@@ -576,6 +578,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
list_del_init(&mid_entry->qhead);
mid_entry->callback(mid_entry);
+ cifs_mid_q_entry_release(mid_entry);
}
if (cifs_rdma_enabled(server)) {
@@ -895,8 +898,10 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed)
if (mid->mid_flags & MID_DELETED)
printk_once(KERN_WARNING
"trying to dequeue a deleted mid\n");
- else
+ else {
list_del_init(&mid->qhead);
+ mid->mid_flags |= MID_DELETED;
+ }
spin_unlock(&GlobalMid_Lock);
}
@@ -966,8 +971,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
cifs_dbg(FYI, "Clearing mid 0x%llx\n", mid_entry->mid);
+ kref_get(&mid_entry->refcount);
mid_entry->mid_state = MID_SHUTDOWN;
list_move(&mid_entry->qhead, &dispose_list);
+ mid_entry->mid_flags |= MID_DELETED;
}
spin_unlock(&GlobalMid_Lock);
@@ -977,6 +984,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
cifs_dbg(FYI, "Callback mid 0x%llx\n", mid_entry->mid);
list_del_init(&mid_entry->qhead);
mid_entry->callback(mid_entry);
+ cifs_mid_q_entry_release(mid_entry);
}
/* 1/8th of sec is more than enough time for them to exit */
msleep(125);
@@ -3882,8 +3890,12 @@ generic_ip_connect(struct TCP_Server_Info *server)
rc = socket->ops->connect(socket, saddr, slen,
server->noblockcnt ? O_NONBLOCK : 0);
-
- if (rc == -EINPROGRESS)
+ /*
+ * When mounting SMB root file systems, we do not want to block in
+ * connect. Otherwise bail out and then let cifs_reconnect() perform
+ * reconnect failover - if possible.
+ */
+ if (server->noblockcnt && rc == -EINPROGRESS)
rc = 0;
if (rc < 0) {
cifs_dbg(FYI, "Error %d connecting to server\n", rc);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5ad15de2bb4f..fa7b0fa72bb3 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -281,6 +281,13 @@ cifs_has_mand_locks(struct cifsInodeInfo *cinode)
return has_locks;
}
+void
+cifs_down_write(struct rw_semaphore *sem)
+{
+ while (!down_write_trylock(sem))
+ msleep(10);
+}
+
struct cifsFileInfo *
cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
struct tcon_link *tlink, __u32 oplock)
@@ -306,7 +313,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
INIT_LIST_HEAD(&fdlocks->locks);
fdlocks->cfile = cfile;
cfile->llist = fdlocks;
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
list_add(&fdlocks->llist, &cinode->llist);
up_write(&cinode->lock_sem);
@@ -405,10 +412,11 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
bool oplock_break_cancelled;
spin_lock(&tcon->open_file_lock);
-
+ spin_lock(&cifsi->open_file_lock);
spin_lock(&cifs_file->file_info_lock);
if (--cifs_file->count > 0) {
spin_unlock(&cifs_file->file_info_lock);
+ spin_unlock(&cifsi->open_file_lock);
spin_unlock(&tcon->open_file_lock);
return;
}
@@ -421,9 +429,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
/* remove it from the lists */
- spin_lock(&cifsi->open_file_lock);
list_del(&cifs_file->flist);
- spin_unlock(&cifsi->open_file_lock);
list_del(&cifs_file->tlist);
atomic_dec(&tcon->num_local_opens);
@@ -440,6 +446,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
cifs_set_oplock_level(cifsi, 0);
}
+ spin_unlock(&cifsi->open_file_lock);
spin_unlock(&tcon->open_file_lock);
oplock_break_cancelled = wait_oplock_handler ?
@@ -464,7 +471,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
* Delete any outstanding lock records. We'll lose them when the file
* is closed anyway.
*/
- down_write(&cifsi->lock_sem);
+ cifs_down_write(&cifsi->lock_sem);
list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
list_del(&li->llist);
cifs_del_lock_waiters(li);
@@ -1027,7 +1034,7 @@ static void
cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
{
struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
list_add_tail(&lock->llist, &cfile->llist->locks);
up_write(&cinode->lock_sem);
}
@@ -1049,7 +1056,7 @@ cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
try_again:
exist = false;
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
lock->type, lock->flags, &conf_lock,
@@ -1072,7 +1079,7 @@ try_again:
(lock->blist.next == &lock->blist));
if (!rc)
goto try_again;
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
list_del_init(&lock->blist);
}
@@ -1125,7 +1132,7 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock)
return rc;
try_again:
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
if (!cinode->can_cache_brlcks) {
up_write(&cinode->lock_sem);
return rc;
@@ -1331,7 +1338,7 @@ cifs_push_locks(struct cifsFileInfo *cfile)
int rc = 0;
/* we are going to update can_cache_brlcks here - need a write access */
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
if (!cinode->can_cache_brlcks) {
up_write(&cinode->lock_sem);
return rc;
@@ -1522,7 +1529,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
if (!buf)
return -ENOMEM;
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
for (i = 0; i < 2; i++) {
cur = buf;
num = 0;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 5dcc95b38310..df9377828e2f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2475,9 +2475,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
rc = tcon->ses->server->ops->flush(xid, tcon, &wfile->fid);
cifsFileInfo_put(wfile);
if (rc)
- return rc;
+ goto cifs_setattr_exit;
} else if (rc != -EBADF)
- return rc;
+ goto cifs_setattr_exit;
else
rc = 0;
}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index b7421a096319..514810694c0f 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -171,6 +171,9 @@ cifs_get_next_mid(struct TCP_Server_Info *server)
/* we do not want to loop forever */
last_mid = cur_mid;
cur_mid++;
+ /* avoid 0xFFFF MID */
+ if (cur_mid == 0xffff)
+ cur_mid++;
/*
* This nested loop looks more expensive than it is.
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index e6a1fc72018f..8b0b512c5792 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -145,7 +145,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
cur = buf;
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
if (flock->fl_start > li->offset ||
(flock->fl_start + length) <
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 4c0922596467..cd55af9b7cc5 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -4084,6 +4084,7 @@ free_pages:
kfree(dw->ppages);
cifs_small_buf_release(dw->buf);
+ kfree(dw);
}
@@ -4157,7 +4158,7 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid,
dw->server = server;
dw->ppages = pages;
dw->len = len;
- queue_work(cifsiod_wq, &dw->decrypt);
+ queue_work(decrypt_wq, &dw->decrypt);
*num_mids = 0; /* worker thread takes care of finding mid */
return -1;
}
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 308ad0f495e1..ca3de62688d6 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -86,22 +86,8 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
static void _cifs_mid_q_entry_release(struct kref *refcount)
{
- struct mid_q_entry *mid = container_of(refcount, struct mid_q_entry,
- refcount);
-
- mempool_free(mid, cifs_mid_poolp);
-}
-
-void cifs_mid_q_entry_release(struct mid_q_entry *midEntry)
-{
- spin_lock(&GlobalMid_Lock);
- kref_put(&midEntry->refcount, _cifs_mid_q_entry_release);
- spin_unlock(&GlobalMid_Lock);
-}
-
-void
-DeleteMidQEntry(struct mid_q_entry *midEntry)
-{
+ struct mid_q_entry *midEntry =
+ container_of(refcount, struct mid_q_entry, refcount);
#ifdef CONFIG_CIFS_STATS2
__le16 command = midEntry->server->vals->lock_cmd;
__u16 smb_cmd = le16_to_cpu(midEntry->command);
@@ -166,6 +152,19 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
}
}
#endif
+
+ mempool_free(midEntry, cifs_mid_poolp);
+}
+
+void cifs_mid_q_entry_release(struct mid_q_entry *midEntry)
+{
+ spin_lock(&GlobalMid_Lock);
+ kref_put(&midEntry->refcount, _cifs_mid_q_entry_release);
+ spin_unlock(&GlobalMid_Lock);
+}
+
+void DeleteMidQEntry(struct mid_q_entry *midEntry)
+{
cifs_mid_q_entry_release(midEntry);
}
@@ -173,8 +172,10 @@ void
cifs_delete_mid(struct mid_q_entry *mid)
{
spin_lock(&GlobalMid_Lock);
- list_del_init(&mid->qhead);
- mid->mid_flags |= MID_DELETED;
+ if (!(mid->mid_flags & MID_DELETED)) {
+ list_del_init(&mid->qhead);
+ mid->mid_flags |= MID_DELETED;
+ }
spin_unlock(&GlobalMid_Lock);
DeleteMidQEntry(mid);
@@ -872,7 +873,10 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
rc = -EHOSTDOWN;
break;
default:
- list_del_init(&mid->qhead);
+ if (!(mid->mid_flags & MID_DELETED)) {
+ list_del_init(&mid->qhead);
+ mid->mid_flags |= MID_DELETED;
+ }
cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n",
__func__, mid->mid, mid->mid_state);
rc = -EIO;
diff --git a/fs/dax.c b/fs/dax.c
index 6bf81f931de3..2cc43cd914eb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -220,10 +220,11 @@ static void *get_unlocked_entry(struct xa_state *xas, unsigned int order)
for (;;) {
entry = xas_find_conflict(xas);
+ if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
+ return entry;
if (dax_entry_order(entry) < order)
return XA_RETRY_ENTRY;
- if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) ||
- !dax_is_locked(entry))
+ if (!dax_is_locked(entry))
return entry;
wq = dax_entry_waitqueue(xas, entry, &ewait.key);
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 6419a2b3510d..3e8cebfb59b7 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -5,6 +5,7 @@
obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o
-obj-$(CONFIG_VIRTIO_FS) += virtio_fs.o
+obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
+virtiofs-y += virtio_fs.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index dadd617d826c..ed1abc9e33cf 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -276,10 +276,12 @@ static void flush_bg_queue(struct fuse_conn *fc)
void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
{
struct fuse_iqueue *fiq = &fc->iq;
- bool async = req->args->end;
+ bool async;
if (test_and_set_bit(FR_FINISHED, &req->flags))
goto put_request;
+
+ async = req->args->end;
/*
* test_and_set_bit() implies smp_mb() between bit
* changing and below intr_entry check. Pairs with
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index d572c900bb0f..54d638f9ba1c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -405,7 +405,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
else
fuse_invalidate_entry_cache(entry);
- fuse_advise_use_readdirplus(dir);
+ if (inode)
+ fuse_advise_use_readdirplus(dir);
return newent;
out_iput:
@@ -1521,6 +1522,19 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
is_truncate = true;
}
+ /* Flush dirty data/metadata before non-truncate SETATTR */
+ if (is_wb && S_ISREG(inode->i_mode) &&
+ attr->ia_valid &
+ (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
+ ATTR_TIMES_SET)) {
+ err = write_inode_now(inode, true);
+ if (err)
+ return err;
+
+ fuse_set_nowrite(inode);
+ fuse_release_nowrite(inode);
+ }
+
if (is_truncate) {
fuse_set_nowrite(inode);
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 0f0225686aee..db48a5cf8620 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -217,7 +217,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
{
struct fuse_conn *fc = get_fuse_conn(inode);
int err;
- bool lock_inode = (file->f_flags & O_TRUNC) &&
+ bool is_wb_truncate = (file->f_flags & O_TRUNC) &&
fc->atomic_o_trunc &&
fc->writeback_cache;
@@ -225,16 +225,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
if (err)
return err;
- if (lock_inode)
+ if (is_wb_truncate) {
inode_lock(inode);
+ fuse_set_nowrite(inode);
+ }
err = fuse_do_open(fc, get_node_id(inode), file, isdir);
if (!err)
fuse_finish_open(inode, file);
- if (lock_inode)
+ if (is_wb_truncate) {
+ fuse_release_nowrite(inode);
inode_unlock(inode);
+ }
return err;
}
@@ -1997,7 +2001,7 @@ static int fuse_writepages_fill(struct page *page,
if (!data->ff) {
err = -EIO;
- data->ff = fuse_write_file_get(fc, get_fuse_inode(inode));
+ data->ff = fuse_write_file_get(fc, fi);
if (!data->ff)
goto out_unlock;
}
@@ -2042,8 +2046,6 @@ static int fuse_writepages_fill(struct page *page,
* under writeback, so we can release the page lock.
*/
if (data->wpa == NULL) {
- struct fuse_inode *fi = get_fuse_inode(inode);
-
err = -ENOMEM;
wpa = fuse_writepage_args_alloc();
if (!wpa) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 956aeaf961ae..d148188cfca4 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -479,6 +479,7 @@ struct fuse_fs_context {
bool destroy:1;
bool no_control:1;
bool no_force_umount:1;
+ bool no_mount_options:1;
unsigned int max_read;
unsigned int blksize;
const char *subtype;
@@ -713,6 +714,9 @@ struct fuse_conn {
/** Do not allow MNT_FORCE umount */
unsigned int no_force_umount:1;
+ /* Do not show mount options */
+ unsigned int no_mount_options:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e040e2a2b621..16aec32f7f3d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -558,6 +558,9 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
struct super_block *sb = root->d_sb;
struct fuse_conn *fc = get_fuse_conn_super(sb);
+ if (fc->no_mount_options)
+ return 0;
+
seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id));
seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id));
if (fc->default_permissions)
@@ -1180,6 +1183,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
fc->destroy = ctx->destroy;
fc->no_control = ctx->no_control;
fc->no_force_umount = ctx->no_force_umount;
+ fc->no_mount_options = ctx->no_mount_options;
err = -ENOMEM;
root = fuse_get_root_inode(sb, ctx->rootmode);
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 6af3f131e468..a5c86048b96e 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -30,6 +30,7 @@ struct virtio_fs_vq {
struct virtqueue *vq; /* protected by ->lock */
struct work_struct done_work;
struct list_head queued_reqs;
+ struct list_head end_reqs; /* End these requests */
struct delayed_work dispatch_work;
struct fuse_dev *fud;
bool connected;
@@ -54,6 +55,9 @@ struct virtio_fs_forget {
struct list_head list;
};
+static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
+ struct fuse_req *req, bool in_flight);
+
static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
{
struct virtio_fs *fs = vq->vdev->priv;
@@ -66,6 +70,19 @@ static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
return &vq_to_fsvq(vq)->fud->pq;
}
+/* Should be called with fsvq->lock held. */
+static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
+{
+ fsvq->in_flight++;
+}
+
+/* Should be called with fsvq->lock held. */
+static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
+{
+ WARN_ON(fsvq->in_flight <= 0);
+ fsvq->in_flight--;
+}
+
static void release_virtio_fs_obj(struct kref *ref)
{
struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
@@ -109,22 +126,6 @@ static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
flush_delayed_work(&fsvq->dispatch_work);
}
-static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq)
-{
- struct virtio_fs_forget *forget;
-
- spin_lock(&fsvq->lock);
- while (1) {
- forget = list_first_entry_or_null(&fsvq->queued_reqs,
- struct virtio_fs_forget, list);
- if (!forget)
- break;
- list_del(&forget->list);
- kfree(forget);
- }
- spin_unlock(&fsvq->lock);
-}
-
static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
{
struct virtio_fs_vq *fsvq;
@@ -132,9 +133,6 @@ static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
for (i = 0; i < fs->nvqs; i++) {
fsvq = &fs->vqs[i];
- if (i == VQ_HIPRIO)
- drain_hiprio_queued_reqs(fsvq);
-
virtio_fs_drain_queue(fsvq);
}
}
@@ -253,14 +251,66 @@ static void virtio_fs_hiprio_done_work(struct work_struct *work)
while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
kfree(req);
- fsvq->in_flight--;
+ dec_in_flight_req(fsvq);
}
} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
spin_unlock(&fsvq->lock);
}
-static void virtio_fs_dummy_dispatch_work(struct work_struct *work)
+static void virtio_fs_request_dispatch_work(struct work_struct *work)
{
+ struct fuse_req *req;
+ struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
+ dispatch_work.work);
+ struct fuse_conn *fc = fsvq->fud->fc;
+ int ret;
+
+ pr_debug("virtio-fs: worker %s called.\n", __func__);
+ while (1) {
+ spin_lock(&fsvq->lock);
+ req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req,
+ list);
+ if (!req) {
+ spin_unlock(&fsvq->lock);
+ break;
+ }
+
+ list_del_init(&req->list);
+ spin_unlock(&fsvq->lock);
+ fuse_request_end(fc, req);
+ }
+
+ /* Dispatch pending requests */
+ while (1) {
+ spin_lock(&fsvq->lock);
+ req = list_first_entry_or_null(&fsvq->queued_reqs,
+ struct fuse_req, list);
+ if (!req) {
+ spin_unlock(&fsvq->lock);
+ return;
+ }
+ list_del_init(&req->list);
+ spin_unlock(&fsvq->lock);
+
+ ret = virtio_fs_enqueue_req(fsvq, req, true);
+ if (ret < 0) {
+ if (ret == -ENOMEM || ret == -ENOSPC) {
+ spin_lock(&fsvq->lock);
+ list_add_tail(&req->list, &fsvq->queued_reqs);
+ schedule_delayed_work(&fsvq->dispatch_work,
+ msecs_to_jiffies(1));
+ spin_unlock(&fsvq->lock);
+ return;
+ }
+ req->out.h.error = ret;
+ spin_lock(&fsvq->lock);
+ dec_in_flight_req(fsvq);
+ spin_unlock(&fsvq->lock);
+ pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
+ ret);
+ fuse_request_end(fc, req);
+ }
+ }
}
static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
@@ -286,6 +336,7 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
list_del(&forget->list);
if (!fsvq->connected) {
+ dec_in_flight_req(fsvq);
spin_unlock(&fsvq->lock);
kfree(forget);
continue;
@@ -307,13 +358,13 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
} else {
pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
ret);
+ dec_in_flight_req(fsvq);
kfree(forget);
}
spin_unlock(&fsvq->lock);
return;
}
- fsvq->in_flight++;
notify = virtqueue_kick_prepare(vq);
spin_unlock(&fsvq->lock);
@@ -452,7 +503,7 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
fuse_request_end(fc, req);
spin_lock(&fsvq->lock);
- fsvq->in_flight--;
+ dec_in_flight_req(fsvq);
spin_unlock(&fsvq->lock);
}
}
@@ -502,6 +553,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
+ INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs);
INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
virtio_fs_hiprio_dispatch_work);
spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
@@ -511,8 +563,9 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
spin_lock_init(&fs->vqs[i].lock);
INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
- virtio_fs_dummy_dispatch_work);
+ virtio_fs_request_dispatch_work);
INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
+ INIT_LIST_HEAD(&fs->vqs[i].end_reqs);
snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
"requests.%u", i - VQ_REQUEST);
callbacks[i] = virtio_fs_vq_done;
@@ -708,6 +761,7 @@ __releases(fiq->lock)
list_add_tail(&forget->list, &fsvq->queued_reqs);
schedule_delayed_work(&fsvq->dispatch_work,
msecs_to_jiffies(1));
+ inc_in_flight_req(fsvq);
} else {
pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
ret);
@@ -717,7 +771,7 @@ __releases(fiq->lock)
goto out;
}
- fsvq->in_flight++;
+ inc_in_flight_req(fsvq);
notify = virtqueue_kick_prepare(vq);
spin_unlock(&fsvq->lock);
@@ -819,7 +873,7 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg,
/* Add a request to a virtqueue and kick the device */
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
- struct fuse_req *req)
+ struct fuse_req *req, bool in_flight)
{
/* requests need at least 4 elements */
struct scatterlist *stack_sgs[6];
@@ -835,6 +889,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
unsigned int i;
int ret;
bool notify;
+ struct fuse_pqueue *fpq;
/* Does the sglist fit on the stack? */
total_sgs = sg_count_fuse_req(req);
@@ -889,7 +944,17 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
goto out;
}
- fsvq->in_flight++;
+ /* Request successfully sent. */
+ fpq = &fsvq->fud->pq;
+ spin_lock(&fpq->lock);
+ list_add_tail(&req->list, fpq->processing);
+ spin_unlock(&fpq->lock);
+ set_bit(FR_SENT, &req->flags);
+ /* matches barrier in request_wait_answer() */
+ smp_mb__after_atomic();
+
+ if (!in_flight)
+ inc_in_flight_req(fsvq);
notify = virtqueue_kick_prepare(vq);
spin_unlock(&fsvq->lock);
@@ -915,9 +980,8 @@ __releases(fiq->lock)
{
unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
struct virtio_fs *fs;
- struct fuse_conn *fc;
struct fuse_req *req;
- struct fuse_pqueue *fpq;
+ struct virtio_fs_vq *fsvq;
int ret;
WARN_ON(list_empty(&fiq->pending));
@@ -928,44 +992,36 @@ __releases(fiq->lock)
spin_unlock(&fiq->lock);
fs = fiq->priv;
- fc = fs->vqs[queue_id].fud->fc;
pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
__func__, req->in.h.opcode, req->in.h.unique,
req->in.h.nodeid, req->in.h.len,
fuse_len_args(req->args->out_numargs, req->args->out_args));
- fpq = &fs->vqs[queue_id].fud->pq;
- spin_lock(&fpq->lock);
- if (!fpq->connected) {
- spin_unlock(&fpq->lock);
- req->out.h.error = -ENODEV;
- pr_err("virtio-fs: %s disconnected\n", __func__);
- fuse_request_end(fc, req);
- return;
- }
- list_add_tail(&req->list, fpq->processing);
- spin_unlock(&fpq->lock);
- set_bit(FR_SENT, &req->flags);
- /* matches barrier in request_wait_answer() */
- smp_mb__after_atomic();
-
-retry:
- ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req);
+ fsvq = &fs->vqs[queue_id];
+ ret = virtio_fs_enqueue_req(fsvq, req, false);
if (ret < 0) {
if (ret == -ENOMEM || ret == -ENOSPC) {
- /* Virtqueue full. Retry submission */
- /* TODO use completion instead of timeout */
- usleep_range(20, 30);
- goto retry;
+ /*
+ * Virtqueue full. Retry submission from worker
+ * context as we might be holding fc->bg_lock.
+ */
+ spin_lock(&fsvq->lock);
+ list_add_tail(&req->list, &fsvq->queued_reqs);
+ inc_in_flight_req(fsvq);
+ schedule_delayed_work(&fsvq->dispatch_work,
+ msecs_to_jiffies(1));
+ spin_unlock(&fsvq->lock);
+ return;
}
req->out.h.error = ret;
pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
- spin_lock(&fpq->lock);
- clear_bit(FR_SENT, &req->flags);
- list_del_init(&req->list);
- spin_unlock(&fpq->lock);
- fuse_request_end(fc, req);
+
+ /* Can't end request in submission context. Use a worker */
+ spin_lock(&fsvq->lock);
+ list_add_tail(&req->list, &fsvq->end_reqs);
+ schedule_delayed_work(&fsvq->dispatch_work, 0);
+ spin_unlock(&fsvq->lock);
return;
}
}
@@ -992,6 +1048,7 @@ static int virtio_fs_fill_super(struct super_block *sb)
.destroy = true,
.no_control = true,
.no_force_umount = true,
+ .no_mount_options = true,
};
mutex_lock(&virtio_fs_mutex);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 681b44682b0d..18daf494abab 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1540,17 +1540,23 @@ static int gfs2_init_fs_context(struct fs_context *fc)
{
struct gfs2_args *args;
- args = kzalloc(sizeof(*args), GFP_KERNEL);
+ args = kmalloc(sizeof(*args), GFP_KERNEL);
if (args == NULL)
return -ENOMEM;
- args->ar_quota = GFS2_QUOTA_DEFAULT;
- args->ar_data = GFS2_DATA_DEFAULT;
- args->ar_commit = 30;
- args->ar_statfs_quantum = 30;
- args->ar_quota_quantum = 60;
- args->ar_errors = GFS2_ERRORS_DEFAULT;
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ struct gfs2_sbd *sdp = fc->root->d_sb->s_fs_info;
+ *args = sdp->sd_args;
+ } else {
+ memset(args, 0, sizeof(*args));
+ args->ar_quota = GFS2_QUOTA_DEFAULT;
+ args->ar_data = GFS2_DATA_DEFAULT;
+ args->ar_commit = 30;
+ args->ar_statfs_quantum = 30;
+ args->ar_quota_quantum = 60;
+ args->ar_errors = GFS2_ERRORS_DEFAULT;
+ }
fc->fs_private = args;
fc->ops = &gfs2_context_ops;
return 0;
@@ -1600,6 +1606,7 @@ static int gfs2_meta_get_tree(struct fs_context *fc)
}
static const struct fs_context_operations gfs2_meta_context_ops = {
+ .free = gfs2_fc_free,
.get_tree = gfs2_meta_get_tree,
};
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 67dbe0201e0d..f9a38998f2fc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -197,6 +197,7 @@ struct io_ring_ctx {
unsigned sq_entries;
unsigned sq_mask;
unsigned sq_thread_idle;
+ unsigned cached_sq_dropped;
struct io_uring_sqe *sq_sqes;
struct list_head defer_list;
@@ -212,6 +213,7 @@ struct io_ring_ctx {
struct {
unsigned cached_cq_tail;
+ atomic_t cached_cq_overflow;
unsigned cq_entries;
unsigned cq_mask;
struct wait_queue_head cq_wait;
@@ -420,7 +422,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
static inline bool __io_sequence_defer(struct io_ring_ctx *ctx,
struct io_kiocb *req)
{
- return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped;
+ return req->sequence != ctx->cached_cq_tail + ctx->cached_sq_dropped
+ + atomic_read(&ctx->cached_cq_overflow);
}
static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
@@ -567,9 +570,8 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
WRITE_ONCE(cqe->res, res);
WRITE_ONCE(cqe->flags, 0);
} else {
- unsigned overflow = READ_ONCE(ctx->rings->cq_overflow);
-
- WRITE_ONCE(ctx->rings->cq_overflow, overflow + 1);
+ WRITE_ONCE(ctx->rings->cq_overflow,
+ atomic_inc_return(&ctx->cached_cq_overflow));
}
}
@@ -735,6 +737,14 @@ static unsigned io_cqring_events(struct io_rings *rings)
return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head);
}
+static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
+{
+ struct io_rings *rings = ctx->rings;
+
+ /* make sure SQ entry isn't read before tail */
+ return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
+}
+
/*
* Find and free completed poll iocbs
*/
@@ -864,19 +874,11 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
mutex_unlock(&ctx->uring_lock);
}
-static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
- long min)
+static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+ long min)
{
- int iters, ret = 0;
+ int iters = 0, ret = 0;
- /*
- * We disallow the app entering submit/complete with polling, but we
- * still need to lock the ring to prevent racing with polled issue
- * that got punted to a workqueue.
- */
- mutex_lock(&ctx->uring_lock);
-
- iters = 0;
do {
int tmin = 0;
@@ -912,6 +914,21 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
ret = 0;
} while (min && !*nr_events && !need_resched());
+ return ret;
+}
+
+static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+ long min)
+{
+ int ret;
+
+ /*
+ * We disallow the app entering submit/complete with polling, but we
+ * still need to lock the ring to prevent racing with polled issue
+ * that got punted to a workqueue.
+ */
+ mutex_lock(&ctx->uring_lock);
+ ret = __io_iopoll_check(ctx, nr_events, min);
mutex_unlock(&ctx->uring_lock);
return ret;
}
@@ -1107,6 +1124,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
kiocb->ki_flags |= IOCB_HIPRI;
kiocb->ki_complete = io_complete_rw_iopoll;
+ req->result = 0;
} else {
if (kiocb->ki_flags & IOCB_HIPRI)
return -EINVAL;
@@ -1877,7 +1895,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
{
struct io_ring_ctx *ctx;
- struct io_kiocb *req;
+ struct io_kiocb *req, *prev;
unsigned long flags;
req = container_of(timer, struct io_kiocb, timeout.timer);
@@ -1885,6 +1903,15 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
atomic_inc(&ctx->cq_timeouts);
spin_lock_irqsave(&ctx->completion_lock, flags);
+ /*
+ * Adjust the reqs sequence before the current one because it
+ * will consume a slot in the cq_ring and the the cq_tail pointer
+ * will be increased, otherwise other timeout reqs may return in
+ * advance without waiting for enough wait_nr.
+ */
+ prev = req;
+ list_for_each_entry_continue_reverse(prev, &ctx->timeout_list, list)
+ prev->sequence++;
list_del(&req->list);
io_cqring_fill_event(ctx, req->user_data, -ETIME);
@@ -1903,6 +1930,7 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_ring_ctx *ctx = req->ctx;
struct list_head *entry;
struct timespec64 ts;
+ unsigned span = 0;
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -1951,9 +1979,17 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (ctx->cached_sq_head < nxt_sq_head)
tmp += UINT_MAX;
- if (tmp >= tmp_nxt)
+ if (tmp > tmp_nxt)
break;
+
+ /*
+ * Sequence of reqs after the insert one and itself should
+ * be adjusted because each timeout req consumes a slot.
+ */
+ span++;
+ nxt->sequence++;
}
+ req->sequence -= span;
list_add(&req->list, entry);
spin_unlock_irq(&ctx->completion_lock);
@@ -2292,11 +2328,11 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
}
static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
- struct sqe_submit *s, bool force_nonblock)
+ struct sqe_submit *s)
{
int ret;
- ret = __io_submit_sqe(ctx, req, s, force_nonblock);
+ ret = __io_submit_sqe(ctx, req, s, true);
/*
* We async punt it if the file wasn't marked NOWAIT, or if the file
@@ -2343,7 +2379,7 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
}
static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
- struct sqe_submit *s, bool force_nonblock)
+ struct sqe_submit *s)
{
int ret;
@@ -2356,18 +2392,17 @@ static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
return 0;
}
- return __io_queue_sqe(ctx, req, s, force_nonblock);
+ return __io_queue_sqe(ctx, req, s);
}
static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req,
- struct sqe_submit *s, struct io_kiocb *shadow,
- bool force_nonblock)
+ struct sqe_submit *s, struct io_kiocb *shadow)
{
int ret;
int need_submit = false;
if (!shadow)
- return io_queue_sqe(ctx, req, s, force_nonblock);
+ return io_queue_sqe(ctx, req, s);
/*
* Mark the first IO in link list as DRAIN, let all the following
@@ -2379,6 +2414,7 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req,
if (ret) {
if (ret != -EIOCBQUEUED) {
io_free_req(req);
+ __io_free_req(shadow);
io_cqring_add_event(ctx, s->sqe->user_data, ret);
return 0;
}
@@ -2396,7 +2432,7 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req,
spin_unlock_irq(&ctx->completion_lock);
if (need_submit)
- return __io_queue_sqe(ctx, req, s, force_nonblock);
+ return __io_queue_sqe(ctx, req, s);
return 0;
}
@@ -2404,8 +2440,7 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req,
#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK)
static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
- struct io_submit_state *state, struct io_kiocb **link,
- bool force_nonblock)
+ struct io_submit_state *state, struct io_kiocb **link)
{
struct io_uring_sqe *sqe_copy;
struct io_kiocb *req;
@@ -2432,6 +2467,8 @@ err:
return;
}
+ req->user_data = s->sqe->user_data;
+
/*
* If we already have a head request, queue this one for async
* submittal once the head completes. If we don't have a head but
@@ -2458,7 +2495,7 @@ err:
INIT_LIST_HEAD(&req->link_list);
*link = req;
} else {
- io_queue_sqe(ctx, req, s, force_nonblock);
+ io_queue_sqe(ctx, req, s);
}
}
@@ -2538,12 +2575,13 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
/* drop invalid entries */
ctx->cached_sq_head++;
- rings->sq_dropped++;
+ ctx->cached_sq_dropped++;
+ WRITE_ONCE(rings->sq_dropped, ctx->cached_sq_dropped);
return false;
}
-static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
- unsigned int nr, bool has_user, bool mm_fault)
+static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
+ bool has_user, bool mm_fault)
{
struct io_submit_state state, *statep = NULL;
struct io_kiocb *link = NULL;
@@ -2557,19 +2595,23 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
}
for (i = 0; i < nr; i++) {
+ struct sqe_submit s;
+
+ if (!io_get_sqring(ctx, &s))
+ break;
+
/*
* If previous wasn't linked and we have a linked command,
* that's the end of the chain. Submit the previous link.
*/
if (!prev_was_link && link) {
- io_queue_link_head(ctx, link, &link->submit, shadow_req,
- true);
+ io_queue_link_head(ctx, link, &link->submit, shadow_req);
link = NULL;
shadow_req = NULL;
}
- prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0;
+ prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0;
- if (link && (sqes[i].sqe->flags & IOSQE_IO_DRAIN)) {
+ if (link && (s.sqe->flags & IOSQE_IO_DRAIN)) {
if (!shadow_req) {
shadow_req = io_get_req(ctx, NULL);
if (unlikely(!shadow_req))
@@ -2577,24 +2619,24 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
refcount_dec(&shadow_req->refs);
}
- shadow_req->sequence = sqes[i].sequence;
+ shadow_req->sequence = s.sequence;
}
out:
if (unlikely(mm_fault)) {
- io_cqring_add_event(ctx, sqes[i].sqe->user_data,
+ io_cqring_add_event(ctx, s.sqe->user_data,
-EFAULT);
} else {
- sqes[i].has_user = has_user;
- sqes[i].needs_lock = true;
- sqes[i].needs_fixed_file = true;
- io_submit_sqe(ctx, &sqes[i], statep, &link, true);
+ s.has_user = has_user;
+ s.needs_lock = true;
+ s.needs_fixed_file = true;
+ io_submit_sqe(ctx, &s, statep, &link);
submitted++;
}
}
if (link)
- io_queue_link_head(ctx, link, &link->submit, shadow_req, true);
+ io_queue_link_head(ctx, link, &link->submit, shadow_req);
if (statep)
io_submit_state_end(&state);
@@ -2603,7 +2645,6 @@ out:
static int io_sq_thread(void *data)
{
- struct sqe_submit sqes[IO_IOPOLL_BATCH];
struct io_ring_ctx *ctx = data;
struct mm_struct *cur_mm = NULL;
mm_segment_t old_fs;
@@ -2618,14 +2659,27 @@ static int io_sq_thread(void *data)
timeout = inflight = 0;
while (!kthread_should_park()) {
- bool all_fixed, mm_fault = false;
- int i;
+ bool mm_fault = false;
+ unsigned int to_submit;
if (inflight) {
unsigned nr_events = 0;
if (ctx->flags & IORING_SETUP_IOPOLL) {
- io_iopoll_check(ctx, &nr_events, 0);
+ /*
+ * inflight is the count of the maximum possible
+ * entries we submitted, but it can be smaller
+ * if we dropped some of them. If we don't have
+ * poll entries available, then we know that we
+ * have nothing left to poll for. Reset the
+ * inflight count to zero in that case.
+ */
+ mutex_lock(&ctx->uring_lock);
+ if (!list_empty(&ctx->poll_list))
+ __io_iopoll_check(ctx, &nr_events, 0);
+ else
+ inflight = 0;
+ mutex_unlock(&ctx->uring_lock);
} else {
/*
* Normal IO, just pretend everything completed.
@@ -2639,7 +2693,8 @@ static int io_sq_thread(void *data)
timeout = jiffies + ctx->sq_thread_idle;
}
- if (!io_get_sqring(ctx, &sqes[0])) {
+ to_submit = io_sqring_entries(ctx);
+ if (!to_submit) {
/*
* We're polling. If we're within the defined idle
* period, then let us spin without work before going
@@ -2670,7 +2725,8 @@ static int io_sq_thread(void *data)
/* make sure to read SQ tail after writing flags */
smp_mb();
- if (!io_get_sqring(ctx, &sqes[0])) {
+ to_submit = io_sqring_entries(ctx);
+ if (!to_submit) {
if (kthread_should_park()) {
finish_wait(&ctx->sqo_wait, &wait);
break;
@@ -2688,19 +2744,8 @@ static int io_sq_thread(void *data)
ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
}
- i = 0;
- all_fixed = true;
- do {
- if (all_fixed && io_sqe_needs_user(sqes[i].sqe))
- all_fixed = false;
-
- i++;
- if (i == ARRAY_SIZE(sqes))
- break;
- } while (io_get_sqring(ctx, &sqes[i]));
-
/* Unless all new commands are FIXED regions, grab mm */
- if (!all_fixed && !cur_mm) {
+ if (!cur_mm) {
mm_fault = !mmget_not_zero(ctx->sqo_mm);
if (!mm_fault) {
use_mm(ctx->sqo_mm);
@@ -2708,8 +2753,9 @@ static int io_sq_thread(void *data)
}
}
- inflight += io_submit_sqes(ctx, sqes, i, cur_mm != NULL,
- mm_fault);
+ to_submit = min(to_submit, ctx->sq_entries);
+ inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL,
+ mm_fault);
/* Commit SQ ring head once we've consumed all SQEs */
io_commit_sqring(ctx);
@@ -2726,8 +2772,7 @@ static int io_sq_thread(void *data)
return 0;
}
-static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
- bool block_for_last)
+static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
{
struct io_submit_state state, *statep = NULL;
struct io_kiocb *link = NULL;
@@ -2741,7 +2786,6 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
}
for (i = 0; i < to_submit; i++) {
- bool force_nonblock = true;
struct sqe_submit s;
if (!io_get_sqring(ctx, &s))
@@ -2752,8 +2796,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
* that's the end of the chain. Submit the previous link.
*/
if (!prev_was_link && link) {
- io_queue_link_head(ctx, link, &link->submit, shadow_req,
- force_nonblock);
+ io_queue_link_head(ctx, link, &link->submit, shadow_req);
link = NULL;
shadow_req = NULL;
}
@@ -2775,27 +2818,16 @@ out:
s.needs_lock = false;
s.needs_fixed_file = false;
submit++;
-
- /*
- * The caller will block for events after submit, submit the
- * last IO non-blocking. This is either the only IO it's
- * submitting, or it already submitted the previous ones. This
- * improves performance by avoiding an async punt that we don't
- * need to do.
- */
- if (block_for_last && submit == to_submit)
- force_nonblock = false;
-
- io_submit_sqe(ctx, &s, statep, &link, force_nonblock);
+ io_submit_sqe(ctx, &s, statep, &link);
}
- io_commit_sqring(ctx);
if (link)
- io_queue_link_head(ctx, link, &link->submit, shadow_req,
- !block_for_last);
+ io_queue_link_head(ctx, link, &link->submit, shadow_req);
if (statep)
io_submit_state_end(statep);
+ io_commit_sqring(ctx);
+
return submit;
}
@@ -3636,21 +3668,10 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
wake_up(&ctx->sqo_wait);
submitted = to_submit;
} else if (to_submit) {
- bool block_for_last = false;
-
to_submit = min(to_submit, ctx->sq_entries);
- /*
- * Allow last submission to block in a series, IFF the caller
- * asked to wait for events and we don't currently have
- * enough. This potentially avoids an async punt.
- */
- if (to_submit == min_complete &&
- io_cqring_events(ctx->rings) < min_complete)
- block_for_last = true;
-
mutex_lock(&ctx->uring_lock);
- submitted = io_ring_submit(ctx, to_submit, block_for_last);
+ submitted = io_ring_submit(ctx, to_submit);
mutex_unlock(&ctx->uring_lock);
}
if (flags & IORING_ENTER_GETEVENTS) {
@@ -3809,10 +3830,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
if (ret)
goto err;
- ret = io_uring_get_fd(ctx);
- if (ret < 0)
- goto err;
-
memset(&p->sq_off, 0, sizeof(p->sq_off));
p->sq_off.head = offsetof(struct io_rings, sq.head);
p->sq_off.tail = offsetof(struct io_rings, sq.tail);
@@ -3830,6 +3847,14 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
p->cq_off.cqes = offsetof(struct io_rings, cqes);
+ /*
+ * Install ring fd as the very last thing, so we don't risk someone
+ * having closed it before we finish setup
+ */
+ ret = io_uring_get_fd(ctx);
+ if (ret < 0)
+ goto err;
+
p->features = IORING_FEAT_SINGLE_MMAP;
return ret;
err:
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 7d46fafdbbe5..0afb6d59bad0 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -464,7 +464,8 @@ nlm_bind_host(struct nlm_host *host)
.version = host->h_version,
.authflavor = RPC_AUTH_UNIX,
.flags = (RPC_CLNT_CREATE_NOPING |
- RPC_CLNT_CREATE_AUTOBIND),
+ RPC_CLNT_CREATE_AUTOBIND |
+ RPC_CLNT_CREATE_REUSEPORT),
.cred = host->h_cred,
};
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 8f34daf85f70..549350259840 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -72,8 +72,8 @@ struct cb_getattrres {
uint32_t bitmap[2];
uint64_t size;
uint64_t change_attr;
- struct timespec ctime;
- struct timespec mtime;
+ struct timespec64 ctime;
+ struct timespec64 mtime;
};
struct cb_recallargs {
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index f39924ba050b..cd4c6bc81cae 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -26,7 +26,6 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
struct cb_getattrargs *args = argp;
struct cb_getattrres *res = resp;
struct nfs_delegation *delegation;
- struct nfs_inode *nfsi;
struct inode *inode;
res->status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
@@ -47,17 +46,16 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
-ntohl(res->status));
goto out;
}
- nfsi = NFS_I(inode);
rcu_read_lock();
- delegation = rcu_dereference(nfsi->delegation);
+ delegation = nfs4_get_valid_delegation(inode);
if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0)
goto out_iput;
res->size = i_size_read(inode);
res->change_attr = delegation->change_attr;
if (nfs_have_writebacks(inode))
res->change_attr++;
- res->ctime = timespec64_to_timespec(inode->i_ctime);
- res->mtime = timespec64_to_timespec(inode->i_mtime);
+ res->ctime = inode->i_ctime;
+ res->mtime = inode->i_mtime;
res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
args->bitmap[0];
res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) &
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 73a5a5ea2976..03a20f5716c7 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -627,7 +627,7 @@ static __be32 encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, u
return 0;
}
-static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec *time)
+static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec64 *time)
{
__be32 *p;
@@ -639,14 +639,14 @@ static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec *ti
return 0;
}
-static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
+static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time)
{
if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA))
return 0;
return encode_attr_time(xdr,time);
}
-static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
+static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time)
{
if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY))
return 0;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 30838304a0bf..02110a30a49e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -312,6 +312,12 @@ again:
/* Match nfsv4 minorversion */
if (clp->cl_minorversion != data->minorversion)
continue;
+
+ /* Match request for a dedicated DS */
+ if (test_bit(NFS_CS_DS, &data->init_flags) !=
+ test_bit(NFS_CS_DS, &clp->cl_flags))
+ continue;
+
/* Match the full socket address */
if (!rpc_cmp_addr_port(sap, clap))
/* Match all xprt_switch full socket addresses */
@@ -515,6 +521,10 @@ int nfs_create_rpc_client(struct nfs_client *clp,
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_INFINITE_SLOTS;
+ if (test_bit(NFS_CS_NOPING, &clp->cl_flags))
+ args.flags |= RPC_CLNT_CREATE_NOPING;
+ if (test_bit(NFS_CS_REUSEPORT, &clp->cl_flags))
+ args.flags |= RPC_CLNT_CREATE_REUSEPORT;
if (!IS_ERR(clp->cl_rpcclient))
return 0;
@@ -662,6 +672,7 @@ static int nfs_init_server(struct nfs_server *server,
.timeparms = &timeparms,
.cred = server->cred,
.nconnect = data->nfs_server.nconnect,
+ .init_flags = (1UL << NFS_CS_REUSEPORT),
};
struct nfs_client *clp;
int error;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 071b90a45933..fe57b2b5314a 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -53,6 +53,16 @@ nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
return false;
}
+struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode)
+{
+ struct nfs_delegation *delegation;
+
+ delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (nfs4_is_valid_delegation(delegation, 0))
+ return delegation;
+ return NULL;
+}
+
static int
nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
{
@@ -189,7 +199,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL) {
spin_lock(&delegation->lock);
- if (delegation->inode != NULL) {
+ if (nfs4_is_valid_delegation(delegation, 0)) {
nfs4_stateid_copy(&delegation->stateid, stateid);
delegation->type = type;
delegation->pagemod_limit = pagemod_limit;
@@ -219,7 +229,6 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
delegation->cred,
&delegation->stateid,
issync);
- nfs_free_delegation(delegation);
return res;
}
@@ -288,7 +297,10 @@ nfs_detach_delegation_locked(struct nfs_inode *nfsi,
return NULL;
spin_lock(&delegation->lock);
- set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
+ if (!delegation->inode) {
+ spin_unlock(&delegation->lock);
+ return NULL;
+ }
list_del_rcu(&delegation->super_list);
delegation->inode = NULL;
rcu_assign_pointer(nfsi->delegation, NULL);
@@ -315,10 +327,12 @@ nfs_inode_detach_delegation(struct inode *inode)
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_delegation *delegation;
- delegation = nfs_start_delegation_return(nfsi);
- if (delegation == NULL)
- return NULL;
- return nfs_detach_delegation(nfsi, delegation, server);
+ rcu_read_lock();
+ delegation = rcu_dereference(nfsi->delegation);
+ if (delegation != NULL)
+ delegation = nfs_detach_delegation(nfsi, delegation, server);
+ rcu_read_unlock();
+ return delegation;
}
static void
@@ -329,6 +343,7 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation,
delegation->stateid.seqid = update->stateid.seqid;
smp_wmb();
delegation->type = update->type;
+ clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
}
}
@@ -369,14 +384,18 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
spin_lock(&clp->cl_lock);
old_delegation = rcu_dereference_protected(nfsi->delegation,
lockdep_is_held(&clp->cl_lock));
- if (old_delegation != NULL) {
- /* Is this an update of the existing delegation? */
- if (nfs4_stateid_match_other(&old_delegation->stateid,
- &delegation->stateid)) {
- nfs_update_inplace_delegation(old_delegation,
- delegation);
- goto out;
- }
+ if (old_delegation == NULL)
+ goto add_new;
+ /* Is this an update of the existing delegation? */
+ if (nfs4_stateid_match_other(&old_delegation->stateid,
+ &delegation->stateid)) {
+ spin_lock(&old_delegation->lock);
+ nfs_update_inplace_delegation(old_delegation,
+ delegation);
+ spin_unlock(&old_delegation->lock);
+ goto out;
+ }
+ if (!test_bit(NFS_DELEGATION_REVOKED, &old_delegation->flags)) {
/*
* Deal with broken servers that hand out two
* delegations for the same file.
@@ -395,11 +414,11 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
if (test_and_set_bit(NFS_DELEGATION_RETURNING,
&old_delegation->flags))
goto out;
- freeme = nfs_detach_delegation_locked(nfsi,
- old_delegation, clp);
- if (freeme == NULL)
- goto out;
}
+ freeme = nfs_detach_delegation_locked(nfsi, old_delegation, clp);
+ if (freeme == NULL)
+ goto out;
+add_new:
list_add_tail_rcu(&delegation->super_list, &server->delegations);
rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
@@ -414,8 +433,10 @@ out:
spin_unlock(&clp->cl_lock);
if (delegation != NULL)
nfs_free_delegation(delegation);
- if (freeme != NULL)
+ if (freeme != NULL) {
nfs_do_return_delegation(inode, freeme, 0);
+ nfs_free_delegation(freeme);
+ }
return status;
}
@@ -425,7 +446,6 @@ out:
static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync)
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
- struct nfs_inode *nfsi = NFS_I(inode);
int err = 0;
if (delegation == NULL)
@@ -447,8 +467,6 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
nfs_abort_delegation_return(delegation, clp);
goto out;
}
- if (!nfs_detach_delegation(nfsi, delegation, NFS_SERVER(inode)))
- goto out;
err = nfs_do_return_delegation(inode, delegation, issync);
out:
@@ -459,8 +477,6 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
{
bool ret = false;
- if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
- goto out;
if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
ret = true;
if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) {
@@ -472,7 +488,10 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
ret = true;
spin_unlock(&delegation->lock);
}
-out:
+ if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) ||
+ test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+ ret = false;
+
return ret;
}
@@ -575,19 +594,23 @@ restart:
}
/**
- * nfs_inode_return_delegation_noreclaim - return delegation, don't reclaim opens
+ * nfs_inode_evict_delegation - return delegation, don't reclaim opens
* @inode: inode to process
*
* Does not protect against delegation reclaims, therefore really only safe
- * to be called from nfs4_clear_inode().
+ * to be called from nfs4_clear_inode(). Guaranteed to always free
+ * the delegation structure.
*/
-void nfs_inode_return_delegation_noreclaim(struct inode *inode)
+void nfs_inode_evict_delegation(struct inode *inode)
{
struct nfs_delegation *delegation;
delegation = nfs_inode_detach_delegation(inode);
- if (delegation != NULL)
+ if (delegation != NULL) {
+ set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags);
nfs_do_return_delegation(inode, delegation, 1);
+ nfs_free_delegation(delegation);
+ }
}
/**
@@ -623,10 +646,18 @@ int nfs4_inode_return_delegation(struct inode *inode)
*/
int nfs4_inode_make_writeable(struct inode *inode)
{
- if (!nfs4_has_session(NFS_SERVER(inode)->nfs_client) ||
- !nfs4_check_delegation(inode, FMODE_WRITE))
- return nfs4_inode_return_delegation(inode);
- return 0;
+ struct nfs_delegation *delegation;
+
+ rcu_read_lock();
+ delegation = nfs4_get_valid_delegation(inode);
+ if (delegation == NULL ||
+ (nfs4_has_session(NFS_SERVER(inode)->nfs_client) &&
+ (delegation->type & FMODE_WRITE))) {
+ rcu_read_unlock();
+ return 0;
+ }
+ rcu_read_unlock();
+ return nfs4_inode_return_delegation(inode);
}
static void nfs_mark_return_if_closed_delegation(struct nfs_server *server,
@@ -734,10 +765,9 @@ static void nfs_mark_delegation_revoked(struct nfs_server *server,
{
set_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
delegation->stateid.type = NFS4_INVALID_STATEID_TYPE;
- nfs_mark_return_delegation(server, delegation);
}
-static bool nfs_revoke_delegation(struct inode *inode,
+static void nfs_revoke_delegation(struct inode *inode,
const nfs4_stateid *stateid)
{
struct nfs_delegation *delegation;
@@ -751,29 +781,69 @@ static bool nfs_revoke_delegation(struct inode *inode,
if (stateid == NULL) {
nfs4_stateid_copy(&tmp, &delegation->stateid);
stateid = &tmp;
- } else if (!nfs4_stateid_match(stateid, &delegation->stateid))
- goto out;
+ } else {
+ if (!nfs4_stateid_match_other(stateid, &delegation->stateid))
+ goto out;
+ spin_lock(&delegation->lock);
+ if (stateid->seqid) {
+ if (nfs4_stateid_is_newer(&delegation->stateid, stateid)) {
+ spin_unlock(&delegation->lock);
+ goto out;
+ }
+ delegation->stateid.seqid = stateid->seqid;
+ }
+ spin_unlock(&delegation->lock);
+ }
nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation);
ret = true;
out:
rcu_read_unlock();
if (ret)
nfs_inode_find_state_and_recover(inode, stateid);
- return ret;
}
void nfs_remove_bad_delegation(struct inode *inode,
const nfs4_stateid *stateid)
{
+ nfs_revoke_delegation(inode, stateid);
+}
+EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
+
+void nfs_delegation_mark_returned(struct inode *inode,
+ const nfs4_stateid *stateid)
+{
struct nfs_delegation *delegation;
- if (!nfs_revoke_delegation(inode, stateid))
+ if (!inode)
return;
- delegation = nfs_inode_detach_delegation(inode);
- if (delegation)
- nfs_free_delegation(delegation);
+
+ rcu_read_lock();
+ delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (!delegation)
+ goto out_rcu_unlock;
+
+ spin_lock(&delegation->lock);
+ if (!nfs4_stateid_match_other(stateid, &delegation->stateid))
+ goto out_spin_unlock;
+ if (stateid->seqid) {
+ /* If delegation->stateid is newer, dont mark as returned */
+ if (nfs4_stateid_is_newer(&delegation->stateid, stateid))
+ goto out_clear_returning;
+ if (delegation->stateid.seqid != stateid->seqid)
+ delegation->stateid.seqid = stateid->seqid;
+ }
+
+ nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation);
+
+out_clear_returning:
+ clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
+out_spin_unlock:
+ spin_unlock(&delegation->lock);
+out_rcu_unlock:
+ rcu_read_unlock();
+
+ nfs_inode_find_state_and_recover(inode, stateid);
}
-EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
/**
* nfs_expire_unused_delegation_types
@@ -830,7 +900,7 @@ int nfs_async_inode_return_delegation(struct inode *inode,
struct nfs_delegation *delegation;
rcu_read_lock();
- delegation = rcu_dereference(NFS_I(inode)->delegation);
+ delegation = nfs4_get_valid_delegation(inode);
if (delegation == NULL)
goto out_enoent;
if (stateid != NULL &&
@@ -856,6 +926,7 @@ nfs_delegation_find_inode_server(struct nfs_server *server,
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL &&
+ !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) &&
nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
freeme = igrab(delegation->inode);
if (freeme && nfs_sb_active(freeme->i_sb))
@@ -1130,7 +1201,8 @@ void nfs_inode_find_delegation_state_and_recover(struct inode *inode,
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation &&
- nfs4_stateid_match_other(&delegation->stateid, stateid)) {
+ nfs4_stateid_match_or_older(&delegation->stateid, stateid) &&
+ !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
nfs_mark_test_expired_delegation(NFS_SERVER(inode), delegation);
found = true;
}
@@ -1179,9 +1251,11 @@ bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL &&
- nfs4_stateid_match_other(dst, &delegation->stateid)) {
+ nfs4_stateid_match_other(dst, &delegation->stateid) &&
+ nfs4_stateid_is_newer(&delegation->stateid, dst) &&
+ !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
dst->seqid = delegation->stateid.seqid;
- return ret;
+ ret = true;
}
rcu_read_unlock();
out:
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 9eb87ae4c982..15d3484be028 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -43,7 +43,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
int nfs4_inode_return_delegation(struct inode *inode);
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
-void nfs_inode_return_delegation_noreclaim(struct inode *inode);
+void nfs_inode_evict_delegation(struct inode *inode);
struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
void nfs_server_return_all_delegations(struct nfs_server *);
@@ -53,6 +53,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
int nfs_client_return_marked_delegations(struct nfs_client *clp);
int nfs_delegations_present(struct nfs_client *clp);
void nfs_remove_bad_delegation(struct inode *inode, const nfs4_stateid *stateid);
+void nfs_delegation_mark_returned(struct inode *inode, const nfs4_stateid *stateid);
void nfs_delegation_mark_reclaim(struct nfs_client *clp);
void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
@@ -68,6 +69,7 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state,
bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred);
bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
+struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
int nfs4_have_delegation(struct inode *inode, fmode_t flags);
int nfs4_check_delegation(struct inode *inode, fmode_t flags);
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index deecb67638aa..3430d6891e89 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -105,6 +105,7 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label, NULL);
if (ret) {
dprintk("%s: getattr failed %d\n", __func__, ret);
+ trace_nfs_fh_to_dentry(sb, server_fh, fattr->fileid, ret);
dentry = ERR_PTR(ret);
goto out_free_label;
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 95dc90570786..8eb731d9be3e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -649,7 +649,7 @@ out:
out_swapfile:
printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
- return -EBUSY;
+ return -ETXTBSY;
}
EXPORT_SYMBOL_GPL(nfs_file_write);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 2a03bfeec10a..b0b4b9f303fd 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -504,15 +504,15 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
nfsi->read_cache_jiffies = fattr->time_start;
nfsi->attr_gencount = fattr->gencount;
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
- inode->i_atime = timespec_to_timespec64(fattr->atime);
+ inode->i_atime = fattr->atime;
else if (nfs_server_capable(inode, NFS_CAP_ATIME))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
- inode->i_mtime = timespec_to_timespec64(fattr->mtime);
+ inode->i_mtime = fattr->mtime;
else if (nfs_server_capable(inode, NFS_CAP_MTIME))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = timespec_to_timespec64(fattr->ctime);
+ inode->i_ctime = fattr->ctime;
else if (nfs_server_capable(inode, NFS_CAP_CTIME))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
@@ -698,7 +698,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
if ((attr->ia_valid & ATTR_GID) != 0)
inode->i_gid = attr->ia_gid;
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = timespec_to_timespec64(fattr->ctime);
+ inode->i_ctime = fattr->ctime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
@@ -709,14 +709,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME
| NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
- inode->i_atime = timespec_to_timespec64(fattr->atime);
+ inode->i_atime = fattr->atime;
else if (attr->ia_valid & ATTR_ATIME_SET)
inode->i_atime = attr->ia_atime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = timespec_to_timespec64(fattr->ctime);
+ inode->i_ctime = fattr->ctime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
@@ -725,14 +725,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME
| NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
- inode->i_mtime = timespec_to_timespec64(fattr->mtime);
+ inode->i_mtime = fattr->mtime;
else if (attr->ia_valid & ATTR_MTIME_SET)
inode->i_mtime = attr->ia_mtime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = timespec_to_timespec64(fattr->ctime);
+ inode->i_ctime = fattr->ctime;
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
@@ -1351,7 +1351,7 @@ static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi)
static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{
- struct timespec ts;
+ struct timespec64 ts;
if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
&& (fattr->valid & NFS_ATTR_FATTR_CHANGE)
@@ -1361,18 +1361,18 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
}
/* If we have atomic WCC data, we may update some attributes */
- ts = timespec64_to_timespec(inode->i_ctime);
+ ts = inode->i_ctime;
if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
&& (fattr->valid & NFS_ATTR_FATTR_CTIME)
- && timespec_equal(&ts, &fattr->pre_ctime)) {
- inode->i_ctime = timespec_to_timespec64(fattr->ctime);
+ && timespec64_equal(&ts, &fattr->pre_ctime)) {
+ inode->i_ctime = fattr->ctime;
}
- ts = timespec64_to_timespec(inode->i_mtime);
+ ts = inode->i_mtime;
if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
&& (fattr->valid & NFS_ATTR_FATTR_MTIME)
- && timespec_equal(&ts, &fattr->pre_mtime)) {
- inode->i_mtime = timespec_to_timespec64(fattr->mtime);
+ && timespec64_equal(&ts, &fattr->pre_mtime)) {
+ inode->i_mtime = fattr->mtime;
if (S_ISDIR(inode->i_mode))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
}
@@ -1398,7 +1398,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
struct nfs_inode *nfsi = NFS_I(inode);
loff_t cur_size, new_isize;
unsigned long invalid = 0;
- struct timespec ts;
+ struct timespec64 ts;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
return 0;
@@ -1425,12 +1425,12 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
invalid |= NFS_INO_INVALID_CHANGE
| NFS_INO_REVAL_PAGECACHE;
- ts = timespec64_to_timespec(inode->i_mtime);
- if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&ts, &fattr->mtime))
+ ts = inode->i_mtime;
+ if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime))
invalid |= NFS_INO_INVALID_MTIME;
- ts = timespec64_to_timespec(inode->i_ctime);
- if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&ts, &fattr->ctime))
+ ts = inode->i_ctime;
+ if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec64_equal(&ts, &fattr->ctime))
invalid |= NFS_INO_INVALID_CTIME;
if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
@@ -1460,8 +1460,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
invalid |= NFS_INO_INVALID_OTHER;
- ts = timespec64_to_timespec(inode->i_atime);
- if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&ts, &fattr->atime))
+ ts = inode->i_atime;
+ if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime))
invalid |= NFS_INO_INVALID_ATIME;
if (invalid != 0)
@@ -1733,12 +1733,12 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa
}
if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) {
- fattr->pre_ctime = timespec64_to_timespec(inode->i_ctime);
+ fattr->pre_ctime = inode->i_ctime;
fattr->valid |= NFS_ATTR_FATTR_PRECTIME;
}
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) {
- fattr->pre_mtime = timespec64_to_timespec(inode->i_mtime);
+ fattr->pre_mtime = inode->i_mtime;
fattr->valid |= NFS_ATTR_FATTR_PREMTIME;
}
if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 &&
@@ -1899,7 +1899,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
}
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
- inode->i_mtime = timespec_to_timespec64(fattr->mtime);
+ inode->i_mtime = fattr->mtime;
} else if (server->caps & NFS_CAP_MTIME) {
nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_MTIME
@@ -1908,7 +1908,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
}
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
- inode->i_ctime = timespec_to_timespec64(fattr->ctime);
+ inode->i_ctime = fattr->ctime;
} else if (server->caps & NFS_CAP_CTIME) {
nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_CTIME
@@ -1946,7 +1946,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
- inode->i_atime = timespec_to_timespec64(fattr->atime);
+ inode->i_atime = fattr->atime;
else if (server->caps & NFS_CAP_ATIME) {
nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ATIME
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 447a3c17fa8e..24a65da58aa9 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -713,7 +713,7 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
* 1024*1024*1024.
*/
static inline
-u64 nfs_timespec_to_change_attr(const struct timespec *ts)
+u64 nfs_timespec_to_change_attr(const struct timespec64 *ts)
{
return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 9287eb666322..5e0e9d29f5c5 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -157,6 +157,9 @@ struct vfsmount *nfs_d_automount(struct path *path)
if (IS_ERR(mnt))
goto out;
+ if (nfs_mountpoint_expiry_timeout < 0)
+ goto out;
+
mntget(mnt); /* prevent immediate expiration */
mnt_set_expiry(mnt, &nfs_automount_list);
schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index cbc17a203248..d94c7abdf25a 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -209,9 +209,9 @@ static int decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh)
* unsigned int useconds;
* };
*/
-static __be32 *xdr_encode_time(__be32 *p, const struct timespec *timep)
+static __be32 *xdr_encode_time(__be32 *p, const struct timespec64 *timep)
{
- *p++ = cpu_to_be32(timep->tv_sec);
+ *p++ = cpu_to_be32((u32)timep->tv_sec);
if (timep->tv_nsec != 0)
*p++ = cpu_to_be32(timep->tv_nsec / NSEC_PER_USEC);
else
@@ -227,14 +227,14 @@ static __be32 *xdr_encode_time(__be32 *p, const struct timespec *timep)
* Illustrated" by Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5.
*/
static __be32 *xdr_encode_current_server_time(__be32 *p,
- const struct timespec *timep)
+ const struct timespec64 *timep)
{
*p++ = cpu_to_be32(timep->tv_sec);
*p++ = cpu_to_be32(1000000);
return p;
}
-static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep)
+static __be32 *xdr_decode_time(__be32 *p, struct timespec64 *timep)
{
timep->tv_sec = be32_to_cpup(p++);
timep->tv_nsec = be32_to_cpup(p++) * NSEC_PER_USEC;
@@ -339,7 +339,6 @@ static __be32 *xdr_time_not_set(__be32 *p)
static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr,
struct user_namespace *userns)
{
- struct timespec ts;
__be32 *p;
p = xdr_reserve_space(xdr, NFS_sattr_sz << 2);
@@ -362,19 +361,15 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr,
*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
if (attr->ia_valid & ATTR_ATIME_SET) {
- ts = timespec64_to_timespec(attr->ia_atime);
- p = xdr_encode_time(p, &ts);
+ p = xdr_encode_time(p, &attr->ia_atime);
} else if (attr->ia_valid & ATTR_ATIME) {
- ts = timespec64_to_timespec(attr->ia_atime);
- p = xdr_encode_current_server_time(p, &ts);
+ p = xdr_encode_current_server_time(p, &attr->ia_atime);
} else
p = xdr_time_not_set(p);
if (attr->ia_valid & ATTR_MTIME_SET) {
- ts = timespec64_to_timespec(attr->ia_atime);
- xdr_encode_time(p, &ts);
+ xdr_encode_time(p, &attr->ia_mtime);
} else if (attr->ia_valid & ATTR_MTIME) {
- ts = timespec64_to_timespec(attr->ia_mtime);
- xdr_encode_current_server_time(p, &ts);
+ xdr_encode_current_server_time(p, &attr->ia_mtime);
} else
xdr_time_not_set(p);
}
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 148ceb74d27c..223904bc40a7 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -106,7 +106,10 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
cl_init.nconnect = mds_clp->cl_nconnect;
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
- set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+
+ __set_bit(NFS_CS_NOPING, &cl_init.init_flags);
+ __set_bit(NFS_CS_DS, &cl_init.init_flags);
/* Use the MDS nfs_client cl_ipaddr. */
nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 602767850b36..927eb680f161 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -456,14 +456,14 @@ static void zero_nfs_fh3(struct nfs_fh *fh)
* uint32 nseconds;
* };
*/
-static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec *timep)
+static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec64 *timep)
{
- *p++ = cpu_to_be32(timep->tv_sec);
+ *p++ = cpu_to_be32((u32)timep->tv_sec);
*p++ = cpu_to_be32(timep->tv_nsec);
return p;
}
-static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep)
+static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec64 *timep)
{
timep->tv_sec = be32_to_cpup(p++);
timep->tv_nsec = be32_to_cpup(p++);
@@ -533,7 +533,6 @@ static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep)
static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr,
struct user_namespace *userns)
{
- struct timespec ts;
u32 nbytes;
__be32 *p;
@@ -583,10 +582,8 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr,
*p++ = xdr_zero;
if (attr->ia_valid & ATTR_ATIME_SET) {
- struct timespec ts;
*p++ = xdr_two;
- ts = timespec64_to_timespec(attr->ia_atime);
- p = xdr_encode_nfstime3(p, &ts);
+ p = xdr_encode_nfstime3(p, &attr->ia_atime);
} else if (attr->ia_valid & ATTR_ATIME) {
*p++ = xdr_one;
} else
@@ -594,8 +591,7 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr,
if (attr->ia_valid & ATTR_MTIME_SET) {
*p++ = xdr_two;
- ts = timespec64_to_timespec(attr->ia_mtime);
- xdr_encode_nfstime3(p, &ts);
+ xdr_encode_nfstime3(p, &attr->ia_mtime);
} else if (attr->ia_valid & ATTR_MTIME) {
*p = xdr_one;
} else
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index 901cca7542f9..c891af949886 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -13,8 +13,10 @@
#define PNFS_LAYOUTSTATS_MAXDEV (4)
/* nfs4.2proc.c */
+#ifdef CONFIG_NFS_V4_2
int nfs42_proc_allocate(struct file *, loff_t, loff_t);
-ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t);
+ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t,
+ struct nl4_server *, nfs4_stateid *, bool);
int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
loff_t nfs42_proc_llseek(struct file *, loff_t, int);
int nfs42_proc_layoutstats_generic(struct nfs_server *,
@@ -23,5 +25,16 @@ int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t);
int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg,
const struct nfs42_layout_error *errors,
size_t n);
+int nfs42_proc_copy_notify(struct file *, struct file *,
+ struct nfs42_copy_notify_res *);
+static inline bool nfs42_files_from_same_server(struct file *in,
+ struct file *out)
+{
+ struct nfs_client *c_in = (NFS_SERVER(file_inode(in)))->nfs_client;
+ struct nfs_client *c_out = (NFS_SERVER(file_inode(out)))->nfs_client;
+ return nfs4_check_serverowner_major_id(c_in->cl_serverowner,
+ c_out->cl_serverowner);
+}
+#endif /* CONFIG_NFS_V4_2 */
#endif /* __LINUX_FS_NFS_NFS4_2_H */
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 5196bfa7894d..1fe83e0f663e 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -3,6 +3,7 @@
* Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com>
*/
#include <linux/fs.h>
+#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/sched.h>
#include <linux/nfs.h>
#include <linux/nfs3.h>
@@ -15,10 +16,30 @@
#include "pnfs.h"
#include "nfs4session.h"
#include "internal.h"
+#include "delegation.h"
#define NFSDBG_FACILITY NFSDBG_PROC
static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std);
+static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr)
+{
+ struct nfs_client *clp = (NFS_SERVER(file_inode(filep)))->nfs_client;
+ unsigned short port = 2049;
+
+ rcu_read_lock();
+ naddr->netid_len = scnprintf(naddr->netid,
+ sizeof(naddr->netid), "%s",
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_NETID));
+ naddr->addr_len = scnprintf(naddr->addr,
+ sizeof(naddr->addr),
+ "%s.%u.%u",
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR),
+ port >> 8, port & 255);
+ rcu_read_unlock();
+}
+
static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
struct nfs_lock_context *lock, loff_t offset, loff_t len)
{
@@ -28,7 +49,7 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
.falloc_fh = NFS_FH(inode),
.falloc_offset = offset,
.falloc_length = len,
- .falloc_bitmask = server->cache_consistency_bitmask,
+ .falloc_bitmask = nfs4_fattr_bitmap,
};
struct nfs42_falloc_res res = {
.falloc_server = server,
@@ -132,22 +153,26 @@ out_unlock:
}
static int handle_async_copy(struct nfs42_copy_res *res,
- struct nfs_server *server,
+ struct nfs_server *dst_server,
+ struct nfs_server *src_server,
struct file *src,
struct file *dst,
- nfs4_stateid *src_stateid)
+ nfs4_stateid *src_stateid,
+ bool *restart)
{
struct nfs4_copy_state *copy, *tmp_copy;
int status = NFS4_OK;
bool found_pending = false;
- struct nfs_open_context *ctx = nfs_file_open_context(dst);
+ struct nfs_open_context *dst_ctx = nfs_file_open_context(dst);
+ struct nfs_open_context *src_ctx = nfs_file_open_context(src);
copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
if (!copy)
return -ENOMEM;
- spin_lock(&server->nfs_client->cl_lock);
- list_for_each_entry(tmp_copy, &server->nfs_client->pending_cb_stateids,
+ spin_lock(&dst_server->nfs_client->cl_lock);
+ list_for_each_entry(tmp_copy,
+ &dst_server->nfs_client->pending_cb_stateids,
copies) {
if (memcmp(&res->write_res.stateid, &tmp_copy->stateid,
NFS4_STATEID_SIZE))
@@ -157,7 +182,7 @@ static int handle_async_copy(struct nfs42_copy_res *res,
break;
}
if (found_pending) {
- spin_unlock(&server->nfs_client->cl_lock);
+ spin_unlock(&dst_server->nfs_client->cl_lock);
kfree(copy);
copy = tmp_copy;
goto out;
@@ -165,19 +190,32 @@ static int handle_async_copy(struct nfs42_copy_res *res,
memcpy(&copy->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE);
init_completion(&copy->completion);
- copy->parent_state = ctx->state;
+ copy->parent_dst_state = dst_ctx->state;
+ copy->parent_src_state = src_ctx->state;
- list_add_tail(&copy->copies, &server->ss_copies);
- spin_unlock(&server->nfs_client->cl_lock);
+ list_add_tail(&copy->copies, &dst_server->ss_copies);
+ spin_unlock(&dst_server->nfs_client->cl_lock);
+
+ if (dst_server != src_server) {
+ spin_lock(&src_server->nfs_client->cl_lock);
+ list_add_tail(&copy->src_copies, &src_server->ss_copies);
+ spin_unlock(&src_server->nfs_client->cl_lock);
+ }
status = wait_for_completion_interruptible(&copy->completion);
- spin_lock(&server->nfs_client->cl_lock);
+ spin_lock(&dst_server->nfs_client->cl_lock);
list_del_init(&copy->copies);
- spin_unlock(&server->nfs_client->cl_lock);
+ spin_unlock(&dst_server->nfs_client->cl_lock);
+ if (dst_server != src_server) {
+ spin_lock(&src_server->nfs_client->cl_lock);
+ list_del_init(&copy->src_copies);
+ spin_unlock(&src_server->nfs_client->cl_lock);
+ }
if (status == -ERESTARTSYS) {
goto out_cancel;
- } else if (copy->flags) {
+ } else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
status = -EAGAIN;
+ *restart = true;
goto out_cancel;
}
out:
@@ -185,12 +223,14 @@ out:
memcpy(&res->write_res.verifier, &copy->verf, sizeof(copy->verf));
status = -copy->error;
+out_free:
kfree(copy);
return status;
out_cancel:
nfs42_do_offload_cancel_async(dst, &copy->stateid);
- kfree(copy);
- return status;
+ if (!nfs42_files_from_same_server(src, dst))
+ nfs42_do_offload_cancel_async(src, src_stateid);
+ goto out_free;
}
static int process_copy_commit(struct file *dst, loff_t pos_dst,
@@ -222,7 +262,10 @@ static ssize_t _nfs42_proc_copy(struct file *src,
struct file *dst,
struct nfs_lock_context *dst_lock,
struct nfs42_copy_args *args,
- struct nfs42_copy_res *res)
+ struct nfs42_copy_res *res,
+ struct nl4_server *nss,
+ nfs4_stateid *cnr_stateid,
+ bool *restart)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],
@@ -230,17 +273,23 @@ static ssize_t _nfs42_proc_copy(struct file *src,
.rpc_resp = res,
};
struct inode *dst_inode = file_inode(dst);
- struct nfs_server *server = NFS_SERVER(dst_inode);
+ struct inode *src_inode = file_inode(src);
+ struct nfs_server *dst_server = NFS_SERVER(dst_inode);
+ struct nfs_server *src_server = NFS_SERVER(src_inode);
loff_t pos_src = args->src_pos;
loff_t pos_dst = args->dst_pos;
size_t count = args->count;
ssize_t status;
- status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context,
- src_lock, FMODE_READ);
- if (status)
- return status;
-
+ if (nss) {
+ args->cp_src = nss;
+ nfs4_stateid_copy(&args->src_stateid, cnr_stateid);
+ } else {
+ status = nfs4_set_rw_stateid(&args->src_stateid,
+ src_lock->open_context, src_lock, FMODE_READ);
+ if (status)
+ return status;
+ }
status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping,
pos_src, pos_src + (loff_t)count - 1);
if (status)
@@ -262,13 +311,15 @@ static ssize_t _nfs42_proc_copy(struct file *src,
if (!res->commit_res.verf)
return -ENOMEM;
}
+ set_bit(NFS_CLNT_SRC_SSC_COPY_STATE,
+ &src_lock->open_context->state->flags);
set_bit(NFS_CLNT_DST_SSC_COPY_STATE,
&dst_lock->open_context->state->flags);
- status = nfs4_call_sync(server->client, server, &msg,
+ status = nfs4_call_sync(dst_server->client, dst_server, &msg,
&args->seq_args, &res->seq_res, 0);
if (status == -ENOTSUPP)
- server->caps &= ~NFS_CAP_COPY;
+ dst_server->caps &= ~NFS_CAP_COPY;
if (status)
goto out;
@@ -280,8 +331,8 @@ static ssize_t _nfs42_proc_copy(struct file *src,
}
if (!res->synchronous) {
- status = handle_async_copy(res, server, src, dst,
- &args->src_stateid);
+ status = handle_async_copy(res, dst_server, src_server, src,
+ dst, &args->src_stateid, restart);
if (status)
return status;
}
@@ -304,8 +355,9 @@ out:
}
ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
- struct file *dst, loff_t pos_dst,
- size_t count)
+ struct file *dst, loff_t pos_dst, size_t count,
+ struct nl4_server *nss,
+ nfs4_stateid *cnr_stateid, bool sync)
{
struct nfs_server *server = NFS_SERVER(file_inode(dst));
struct nfs_lock_context *src_lock;
@@ -316,7 +368,7 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
.dst_fh = NFS_FH(file_inode(dst)),
.dst_pos = pos_dst,
.count = count,
- .sync = false,
+ .sync = sync,
};
struct nfs42_copy_res res;
struct nfs4_exception src_exception = {
@@ -328,6 +380,7 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
.stateid = &args.dst_stateid,
};
ssize_t err, err2;
+ bool restart = false;
src_lock = nfs_get_lock_context(nfs_file_open_context(src));
if (IS_ERR(src_lock))
@@ -347,21 +400,33 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
inode_lock(file_inode(dst));
err = _nfs42_proc_copy(src, src_lock,
dst, dst_lock,
- &args, &res);
+ &args, &res,
+ nss, cnr_stateid, &restart);
inode_unlock(file_inode(dst));
if (err >= 0)
break;
- if (err == -ENOTSUPP) {
+ if (err == -ENOTSUPP &&
+ nfs42_files_from_same_server(src, dst)) {
err = -EOPNOTSUPP;
break;
} else if (err == -EAGAIN) {
- dst_exception.retry = 1;
- continue;
+ if (!restart) {
+ dst_exception.retry = 1;
+ continue;
+ }
+ break;
} else if (err == -NFS4ERR_OFFLOAD_NO_REQS && !args.sync) {
args.sync = true;
dst_exception.retry = 1;
continue;
+ } else if ((err == -ESTALE ||
+ err == -NFS4ERR_OFFLOAD_DENIED ||
+ err == -ENOTSUPP) &&
+ !nfs42_files_from_same_server(src, dst)) {
+ nfs42_do_offload_cancel_async(src, &args.src_stateid);
+ err = -EOPNOTSUPP;
+ break;
}
err2 = nfs4_handle_exception(server, err, &src_exception);
@@ -459,6 +524,76 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
return status;
}
+static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
+ struct nfs42_copy_notify_args *args,
+ struct nfs42_copy_notify_res *res)
+{
+ struct nfs_server *src_server = NFS_SERVER(file_inode(src));
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY_NOTIFY],
+ .rpc_argp = args,
+ .rpc_resp = res,
+ };
+ int status;
+ struct nfs_open_context *ctx;
+ struct nfs_lock_context *l_ctx;
+
+ ctx = get_nfs_open_context(nfs_file_open_context(src));
+ l_ctx = nfs_get_lock_context(ctx);
+ if (IS_ERR(l_ctx))
+ return PTR_ERR(l_ctx);
+
+ status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx,
+ FMODE_READ);
+ nfs_put_lock_context(l_ctx);
+ if (status)
+ return status;
+
+ status = nfs4_call_sync(src_server->client, src_server, &msg,
+ &args->cna_seq_args, &res->cnr_seq_res, 0);
+ if (status == -ENOTSUPP)
+ src_server->caps &= ~NFS_CAP_COPY_NOTIFY;
+
+ put_nfs_open_context(nfs_file_open_context(src));
+ return status;
+}
+
+int nfs42_proc_copy_notify(struct file *src, struct file *dst,
+ struct nfs42_copy_notify_res *res)
+{
+ struct nfs_server *src_server = NFS_SERVER(file_inode(src));
+ struct nfs42_copy_notify_args *args;
+ struct nfs4_exception exception = {
+ .inode = file_inode(src),
+ };
+ int status;
+
+ if (!(src_server->caps & NFS_CAP_COPY_NOTIFY))
+ return -EOPNOTSUPP;
+
+ args = kzalloc(sizeof(struct nfs42_copy_notify_args), GFP_NOFS);
+ if (args == NULL)
+ return -ENOMEM;
+
+ args->cna_src_fh = NFS_FH(file_inode(src)),
+ args->cna_dst.nl4_type = NL4_NETADDR;
+ nfs42_set_netaddr(dst, &args->cna_dst.u.nl4_addr);
+ exception.stateid = &args->cna_src_stateid;
+
+ do {
+ status = _nfs42_proc_copy_notify(src, dst, args, res);
+ if (status == -ENOTSUPP) {
+ status = -EOPNOTSUPP;
+ goto out;
+ }
+ status = nfs4_handle_exception(src_server, status, &exception);
+ } while (exception.retry);
+
+out:
+ kfree(args);
+ return status;
+}
+
static loff_t _nfs42_proc_llseek(struct file *filep,
struct nfs_lock_context *lock, loff_t offset, int whence)
{
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index aed865a84629..c03f3246d6c5 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -21,7 +21,10 @@
#define encode_copy_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE) + \
XDR_QUADLEN(NFS4_STATEID_SIZE) + \
- 2 + 2 + 2 + 1 + 1 + 1)
+ 2 + 2 + 2 + 1 + 1 + 1 +\
+ 1 + /* One cnr_source_server */\
+ 1 + /* nl4_type */ \
+ 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT))
#define decode_copy_maxsz (op_decode_hdr_maxsz + \
NFS42_WRITE_RES_SIZE + \
1 /* cr_consecutive */ + \
@@ -29,6 +32,16 @@
#define encode_offload_cancel_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE))
#define decode_offload_cancel_maxsz (op_decode_hdr_maxsz)
+#define encode_copy_notify_maxsz (op_encode_hdr_maxsz + \
+ XDR_QUADLEN(NFS4_STATEID_SIZE) + \
+ 1 + /* nl4_type */ \
+ 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT))
+#define decode_copy_notify_maxsz (op_decode_hdr_maxsz + \
+ 3 + /* cnr_lease_time */\
+ XDR_QUADLEN(NFS4_STATEID_SIZE) + \
+ 1 + /* Support 1 cnr_source_server */\
+ 1 + /* nl4_type */ \
+ 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT))
#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \
encode_fallocate_maxsz)
#define decode_deallocate_maxsz (op_decode_hdr_maxsz)
@@ -99,6 +112,12 @@
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_offload_cancel_maxsz)
+#define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_copy_notify_maxsz)
+#define NFS4_dec_copy_notify_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_copy_notify_maxsz)
#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -166,6 +185,26 @@ static void encode_allocate(struct xdr_stream *xdr,
encode_fallocate(xdr, args);
}
+static void encode_nl4_server(struct xdr_stream *xdr,
+ const struct nl4_server *ns)
+{
+ encode_uint32(xdr, ns->nl4_type);
+ switch (ns->nl4_type) {
+ case NL4_NAME:
+ case NL4_URL:
+ encode_string(xdr, ns->u.nl4_str_sz, ns->u.nl4_str);
+ break;
+ case NL4_NETADDR:
+ encode_string(xdr, ns->u.nl4_addr.netid_len,
+ ns->u.nl4_addr.netid);
+ encode_string(xdr, ns->u.nl4_addr.addr_len,
+ ns->u.nl4_addr.addr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+
static void encode_copy(struct xdr_stream *xdr,
const struct nfs42_copy_args *args,
struct compound_hdr *hdr)
@@ -180,7 +219,12 @@ static void encode_copy(struct xdr_stream *xdr,
encode_uint32(xdr, 1); /* consecutive = true */
encode_uint32(xdr, args->sync);
- encode_uint32(xdr, 0); /* src server list */
+ if (args->cp_src == NULL) { /* intra-ssc */
+ encode_uint32(xdr, 0); /* no src server list */
+ return;
+ }
+ encode_uint32(xdr, 1); /* supporting 1 server */
+ encode_nl4_server(xdr, args->cp_src);
}
static void encode_offload_cancel(struct xdr_stream *xdr,
@@ -191,6 +235,15 @@ static void encode_offload_cancel(struct xdr_stream *xdr,
encode_nfs4_stateid(xdr, &args->osa_stateid);
}
+static void encode_copy_notify(struct xdr_stream *xdr,
+ const struct nfs42_copy_notify_args *args,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_COPY_NOTIFY, decode_copy_notify_maxsz, hdr);
+ encode_nfs4_stateid(xdr, &args->cna_src_stateid);
+ encode_nl4_server(xdr, &args->cna_dst);
+}
+
static void encode_deallocate(struct xdr_stream *xdr,
const struct nfs42_falloc_args *args,
struct compound_hdr *hdr)
@@ -355,6 +408,25 @@ static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req,
}
/*
+ * Encode COPY_NOTIFY request
+ */
+static void nfs4_xdr_enc_copy_notify(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_copy_notify_args *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->cna_seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->cna_seq_args, &hdr);
+ encode_putfh(xdr, args->cna_src_fh, &hdr);
+ encode_copy_notify(xdr, args, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
* Encode DEALLOCATE request
*/
static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
@@ -490,6 +562,58 @@ static int decode_write_response(struct xdr_stream *xdr,
return decode_verifier(xdr, &res->verifier.verifier);
}
+static int decode_nl4_server(struct xdr_stream *xdr, struct nl4_server *ns)
+{
+ struct nfs42_netaddr *naddr;
+ uint32_t dummy;
+ char *dummy_str;
+ __be32 *p;
+ int status;
+
+ /* nl_type */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+ ns->nl4_type = be32_to_cpup(p);
+ switch (ns->nl4_type) {
+ case NL4_NAME:
+ case NL4_URL:
+ status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+ if (unlikely(status))
+ return status;
+ if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
+ return -EIO;
+ memcpy(&ns->u.nl4_str, dummy_str, dummy);
+ ns->u.nl4_str_sz = dummy;
+ break;
+ case NL4_NETADDR:
+ naddr = &ns->u.nl4_addr;
+
+ /* netid string */
+ status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+ if (unlikely(status))
+ return status;
+ if (unlikely(dummy > RPCBIND_MAXNETIDLEN))
+ return -EIO;
+ naddr->netid_len = dummy;
+ memcpy(naddr->netid, dummy_str, naddr->netid_len);
+
+ /* uaddr string */
+ status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+ if (unlikely(status))
+ return status;
+ if (unlikely(dummy > RPCBIND_MAXUADDRLEN))
+ return -EIO;
+ naddr->addr_len = dummy;
+ memcpy(naddr->addr, dummy_str, naddr->addr_len);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return -EIO;
+ }
+ return 0;
+}
+
static int decode_copy_requirements(struct xdr_stream *xdr,
struct nfs42_copy_res *res) {
__be32 *p;
@@ -529,6 +653,42 @@ static int decode_offload_cancel(struct xdr_stream *xdr,
return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL);
}
+static int decode_copy_notify(struct xdr_stream *xdr,
+ struct nfs42_copy_notify_res *res)
+{
+ __be32 *p;
+ int status, count;
+
+ status = decode_op_hdr(xdr, OP_COPY_NOTIFY);
+ if (status)
+ return status;
+ /* cnr_lease_time */
+ p = xdr_inline_decode(xdr, 12);
+ if (unlikely(!p))
+ return -EIO;
+ p = xdr_decode_hyper(p, &res->cnr_lease_time.seconds);
+ res->cnr_lease_time.nseconds = be32_to_cpup(p);
+
+ status = decode_opaque_fixed(xdr, &res->cnr_stateid, NFS4_STATEID_SIZE);
+ if (unlikely(status))
+ return -EIO;
+
+ /* number of source addresses */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ count = be32_to_cpup(p);
+ if (count > 1)
+ pr_warn("NFS: %s: nsvr %d > Supported. Use first servers\n",
+ __func__, count);
+
+ status = decode_nl4_server(xdr, &res->cnr_src);
+ if (unlikely(status))
+ return -EIO;
+ return 0;
+}
+
static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{
return decode_op_hdr(xdr, OP_DEALLOCATE);
@@ -657,6 +817,32 @@ out:
}
/*
+ * Decode COPY_NOTIFY response
+ */
+static int nfs4_xdr_dec_copy_notify(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfs42_copy_notify_res *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->cnr_seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_copy_notify(xdr, res);
+
+out:
+ return status;
+}
+
+/*
* Decode DEALLOCATE request
*/
static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 16b2e5cc3e94..a4520115d8a3 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -168,6 +168,8 @@ enum {
NFS_STATE_CHANGE_WAIT, /* A state changing operation is outstanding */
#ifdef CONFIG_NFS_V4_2
NFS_CLNT_DST_SSC_COPY_STATE, /* dst server open state on client*/
+ NFS_CLNT_SRC_SSC_COPY_STATE, /* src server open state on client*/
+ NFS_SRV_SSC_COPY_STATE, /* ssc state on the dst server */
#endif /* CONFIG_NFS_V4_2 */
};
@@ -311,6 +313,13 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
const struct nfs_open_context *ctx,
const struct nfs_lock_context *l_ctx,
fmode_t fmode);
+extern int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr, struct nfs4_label *label,
+ struct inode *inode);
+extern int update_open_stateid(struct nfs4_state *state,
+ const nfs4_stateid *open_stateid,
+ const nfs4_stateid *deleg_stateid,
+ fmode_t fmode);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
struct nfs_fsinfo *fsinfo);
@@ -445,6 +454,8 @@ extern void nfs4_set_lease_period(struct nfs_client *clp,
/* nfs4state.c */
+extern const nfs4_stateid current_stateid;
+
const struct cred *nfs4_get_clid_cred(struct nfs_client *clp);
const struct cred *nfs4_get_machine_cred(struct nfs_client *clp);
const struct cred *nfs4_get_renew_cred(struct nfs_client *clp);
@@ -457,6 +468,8 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
struct nfs_client **, const struct cred *);
extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
extern void nfs41_notify_server(struct nfs_client *);
+bool nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1,
+ struct nfs41_server_owner *o2);
#else
static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
{
@@ -572,6 +585,12 @@ static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stat
return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0;
}
+static inline bool nfs4_stateid_match_or_older(const nfs4_stateid *dst, const nfs4_stateid *src)
+{
+ return nfs4_stateid_match_other(dst, src) &&
+ !(src->seqid && nfs4_stateid_is_newer(dst, src));
+}
+
static inline void nfs4_stateid_seqid_inc(nfs4_stateid *s1)
{
u32 seqid = be32_to_cpu(s1->seqid);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index da6204025a2d..460d6251c405 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -629,7 +629,7 @@ out:
/*
* Returns true if the server major ids match
*/
-static bool
+bool
nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1,
struct nfs41_server_owner *o2)
{
@@ -879,14 +879,17 @@ static int nfs4_set_client(struct nfs_server *server,
};
struct nfs_client *clp;
- if (minorversion > 0 && proto == XPRT_TRANSPORT_TCP)
+ if (minorversion == 0)
+ __set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags);
+ else if (proto == XPRT_TRANSPORT_TCP)
cl_init.nconnect = nconnect;
+
if (server->flags & NFS_MOUNT_NORESVPORT)
- set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
if (server->options & NFS_OPTION_MIGRATION)
- set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
+ __set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
- set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
+ __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
server->port = rpc_get_port(addr);
/* Allocate or find a client reference we can use */
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 339663d04bf8..620de905cba9 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -133,14 +133,55 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t count, unsigned int flags)
{
+ struct nfs42_copy_notify_res *cn_resp = NULL;
+ struct nl4_server *nss = NULL;
+ nfs4_stateid *cnrs = NULL;
+ ssize_t ret;
+ bool sync = false;
+
/* Only offload copy if superblock is the same */
- if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
+ if (file_in->f_op != &nfs4_file_operations)
return -EXDEV;
if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY))
return -EOPNOTSUPP;
if (file_inode(file_in) == file_inode(file_out))
return -EOPNOTSUPP;
- return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
+ /* if the copy size if smaller than 2 RPC payloads, make it
+ * synchronous
+ */
+ if (count <= 2 * NFS_SERVER(file_inode(file_in))->rsize)
+ sync = true;
+retry:
+ if (!nfs42_files_from_same_server(file_in, file_out)) {
+ /* for inter copy, if copy size if smaller than 12 RPC
+ * payloads, fallback to traditional copy. There are
+ * 14 RPCs during an NFSv4.x mount between source/dest
+ * servers.
+ */
+ if (sync ||
+ count <= 14 * NFS_SERVER(file_inode(file_in))->rsize)
+ return -EOPNOTSUPP;
+ cn_resp = kzalloc(sizeof(struct nfs42_copy_notify_res),
+ GFP_NOFS);
+ if (unlikely(cn_resp == NULL))
+ return -ENOMEM;
+
+ ret = nfs42_proc_copy_notify(file_in, file_out, cn_resp);
+ if (ret) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+ nss = &cn_resp->cnr_src;
+ cnrs = &cn_resp->cnr_stateid;
+ }
+ ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count,
+ nss, cnrs, sync);
+out:
+ if (!nfs42_files_from_same_server(file_in, file_out))
+ kfree(cn_resp);
+ if (ret == -EAGAIN)
+ goto retry;
+ return ret;
}
static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
@@ -263,6 +304,102 @@ out_unlock:
out:
return ret < 0 ? ret : count;
}
+
+static int read_name_gen = 1;
+#define SSC_READ_NAME_BODY "ssc_read_%d"
+
+struct file *
+nfs42_ssc_open(struct vfsmount *ss_mnt, struct nfs_fh *src_fh,
+ nfs4_stateid *stateid)
+{
+ struct nfs_fattr fattr;
+ struct file *filep, *res;
+ struct nfs_server *server;
+ struct inode *r_ino = NULL;
+ struct nfs_open_context *ctx;
+ struct nfs4_state_owner *sp;
+ char *read_name = NULL;
+ int len, status = 0;
+
+ server = NFS_SERVER(ss_mnt->mnt_root->d_inode);
+
+ nfs_fattr_init(&fattr);
+
+ status = nfs4_proc_getattr(server, src_fh, &fattr, NULL, NULL);
+ if (status < 0) {
+ res = ERR_PTR(status);
+ goto out;
+ }
+
+ res = ERR_PTR(-ENOMEM);
+ len = strlen(SSC_READ_NAME_BODY) + 16;
+ read_name = kzalloc(len, GFP_NOFS);
+ if (read_name == NULL)
+ goto out;
+ snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++);
+
+ r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, &fattr,
+ NULL);
+ if (IS_ERR(r_ino)) {
+ res = ERR_CAST(r_ino);
+ goto out_free_name;
+ }
+
+ filep = alloc_file_pseudo(r_ino, ss_mnt, read_name, FMODE_READ,
+ r_ino->i_fop);
+ if (IS_ERR(filep)) {
+ res = ERR_CAST(filep);
+ goto out_free_name;
+ }
+ filep->f_mode |= FMODE_READ;
+
+ ctx = alloc_nfs_open_context(filep->f_path.dentry, filep->f_mode,
+ filep);
+ if (IS_ERR(ctx)) {
+ res = ERR_CAST(ctx);
+ goto out_filep;
+ }
+
+ res = ERR_PTR(-EINVAL);
+ sp = nfs4_get_state_owner(server, ctx->cred, GFP_KERNEL);
+ if (sp == NULL)
+ goto out_ctx;
+
+ ctx->state = nfs4_get_open_state(r_ino, sp);
+ if (ctx->state == NULL)
+ goto out_stateowner;
+
+ set_bit(NFS_SRV_SSC_COPY_STATE, &ctx->state->flags);
+ set_bit(NFS_OPEN_STATE, &ctx->state->flags);
+ memcpy(&ctx->state->open_stateid.other, &stateid->other,
+ NFS4_STATEID_OTHER_SIZE);
+ update_open_stateid(ctx->state, stateid, NULL, filep->f_mode);
+
+ nfs_file_set_open_context(filep, ctx);
+ put_nfs_open_context(ctx);
+
+ file_ra_state_init(&filep->f_ra, filep->f_mapping->host->i_mapping);
+ res = filep;
+out_free_name:
+ kfree(read_name);
+out:
+ return res;
+out_stateowner:
+ nfs4_put_state_owner(sp);
+out_ctx:
+ put_nfs_open_context(ctx);
+out_filep:
+ fput(filep);
+ goto out_free_name;
+}
+EXPORT_SYMBOL_GPL(nfs42_ssc_open);
+void nfs42_ssc_close(struct file *filep)
+{
+ struct nfs_open_context *ctx = nfs_file_open_context(filep);
+
+ ctx->state->flags = 0;
+}
+EXPORT_SYMBOL_GPL(nfs42_ssc_close);
#endif /* CONFIG_NFS_V4_2 */
const struct file_operations nfs4_file_operations = {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ab8ca20fd579..76d37161409a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -91,7 +91,6 @@ struct nfs4_opendata;
static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
-static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label, struct inode *inode);
static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode);
static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
struct nfs_fattr *fattr, struct iattr *sattr,
@@ -476,6 +475,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_PARTNER_NO_AUTH:
if (inode != NULL && stateid != NULL) {
nfs_inode_find_state_and_recover(inode,
stateid);
@@ -521,9 +521,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
case -NFS4ERR_DEADSESSION:
case -NFS4ERR_SEQ_FALSE_RETRY:
case -NFS4ERR_SEQ_MISORDERED:
- dprintk("%s ERROR: %d Reset session\n", __func__,
- errorcode);
- nfs4_schedule_session_recovery(clp->cl_session, errorcode);
+ /* Handled in nfs41_sequence_process() */
goto wait_on_recovery;
#endif /* defined(CONFIG_NFS_V4_1) */
case -NFS4ERR_FILE_OPEN:
@@ -782,6 +780,7 @@ static int nfs41_sequence_process(struct rpc_task *task,
struct nfs4_session *session;
struct nfs4_slot *slot = res->sr_slot;
struct nfs_client *clp;
+ int status;
int ret = 1;
if (slot == NULL)
@@ -793,8 +792,13 @@ static int nfs41_sequence_process(struct rpc_task *task,
session = slot->table->session;
trace_nfs4_sequence_done(session, res);
+
+ status = res->sr_status;
+ if (task->tk_status == -NFS4ERR_DEADSESSION)
+ status = -NFS4ERR_DEADSESSION;
+
/* Check the SEQUENCE operation status */
- switch (res->sr_status) {
+ switch (status) {
case 0:
/* Mark this sequence number as having been acked */
nfs4_slot_sequence_acked(slot, slot->seq_nr);
@@ -866,6 +870,10 @@ static int nfs41_sequence_process(struct rpc_task *task,
*/
slot->seq_nr = slot->seq_nr_highest_sent;
goto out_retry;
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_DEADSESSION:
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ goto session_recover;
default:
/* Just update the slot sequence no. */
slot->seq_done = 1;
@@ -876,8 +884,10 @@ out:
out_noaction:
return ret;
session_recover:
- nfs4_schedule_session_recovery(session, res->sr_status);
- goto retry_nowait;
+ nfs4_schedule_session_recovery(session, status);
+ dprintk("%s ERROR: %d Reset session\n", __func__, status);
+ nfs41_sequence_free_slot(res);
+ goto out;
retry_new_seq:
++slot->seq_nr;
retry_nowait:
@@ -1440,8 +1450,6 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode,
return 0;
if ((delegation->type & fmode) != fmode)
return 0;
- if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
- return 0;
switch (claim) {
case NFS4_OPEN_CLAIM_NULL:
case NFS4_OPEN_CLAIM_FH:
@@ -1718,7 +1726,7 @@ static void nfs_state_clear_delegation(struct nfs4_state *state)
write_sequnlock(&state->seqlock);
}
-static int update_open_stateid(struct nfs4_state *state,
+int update_open_stateid(struct nfs4_state *state,
const nfs4_stateid *open_stateid,
const nfs4_stateid *delegation,
fmode_t fmode)
@@ -1739,7 +1747,7 @@ static int update_open_stateid(struct nfs4_state *state,
ret = 1;
}
- deleg_cur = rcu_dereference(nfsi->delegation);
+ deleg_cur = nfs4_get_valid_delegation(state->inode);
if (deleg_cur == NULL)
goto no_delegation;
@@ -1751,7 +1759,7 @@ static int update_open_stateid(struct nfs4_state *state,
if (delegation == NULL)
delegation = &deleg_cur->stateid;
- else if (!nfs4_stateid_match(&deleg_cur->stateid, delegation))
+ else if (!nfs4_stateid_match_other(&deleg_cur->stateid, delegation))
goto no_delegation_unlock;
nfs_mark_delegation_referenced(deleg_cur);
@@ -1798,7 +1806,7 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo
fmode &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
- delegation = rcu_dereference(NFS_I(inode)->delegation);
+ delegation = nfs4_get_valid_delegation(inode);
if (delegation == NULL || (delegation->type & fmode) == fmode) {
rcu_read_unlock();
return;
@@ -1810,7 +1818,6 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo
static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
{
struct nfs4_state *state = opendata->state;
- struct nfs_inode *nfsi = NFS_I(state->inode);
struct nfs_delegation *delegation;
int open_mode = opendata->o_arg.open_flags;
fmode_t fmode = opendata->o_arg.fmode;
@@ -1827,7 +1834,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
}
spin_unlock(&state->owner->so_lock);
rcu_read_lock();
- delegation = rcu_dereference(nfsi->delegation);
+ delegation = nfs4_get_valid_delegation(state->inode);
if (!can_open_delegated(delegation, fmode, claim)) {
rcu_read_unlock();
break;
@@ -2191,7 +2198,6 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
return -EAGAIN;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
@@ -2371,7 +2377,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
data->o_arg.open_flags, claim))
goto out_no_action;
rcu_read_lock();
- delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
+ delegation = nfs4_get_valid_delegation(data->state->inode);
if (can_open_delegated(delegation, data->o_arg.fmode, claim))
goto unlock_no_action;
rcu_read_unlock();
@@ -4065,7 +4071,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
}
-static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
+int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label,
struct inode *inode)
{
@@ -5101,12 +5107,12 @@ static bool nfs4_stateid_is_current(nfs4_stateid *stateid,
const struct nfs_lock_context *l_ctx,
fmode_t fmode)
{
- nfs4_stateid current_stateid;
+ nfs4_stateid _current_stateid;
/* If the current stateid represents a lost lock, then exit */
- if (nfs4_set_rw_stateid(&current_stateid, ctx, l_ctx, fmode) == -EIO)
+ if (nfs4_set_rw_stateid(&_current_stateid, ctx, l_ctx, fmode) == -EIO)
return true;
- return nfs4_stateid_match(stateid, &current_stateid);
+ return nfs4_stateid_match(stateid, &_current_stateid);
}
static bool nfs4_error_stateid_expired(int err)
@@ -6199,10 +6205,13 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
task->tk_status = 0;
break;
case -NFS4ERR_OLD_STATEID:
- if (nfs4_refresh_delegation_stateid(&data->stateid, data->inode))
- goto out_restart;
- task->tk_status = 0;
- break;
+ if (!nfs4_refresh_delegation_stateid(&data->stateid, data->inode))
+ nfs4_stateid_seqid_inc(&data->stateid);
+ if (data->args.bitmask) {
+ data->args.bitmask = NULL;
+ data->res.fattr = NULL;
+ }
+ goto out_restart;
case -NFS4ERR_ACCESS:
if (data->args.bitmask) {
data->args.bitmask = NULL;
@@ -6217,6 +6226,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
if (exception.retry)
goto out_restart;
}
+ nfs_delegation_mark_returned(data->inode, data->args.stateid);
data->rpc_status = task->tk_status;
return;
out_restart:
@@ -6246,8 +6256,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
d_data = (struct nfs4_delegreturndata *)data;
- if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task))
+ if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) {
+ nfs4_sequence_done(task, &d_data->res.seq_res);
return;
+ }
lo = d_data->args.lr_args ? d_data->args.lr_args->layout : NULL;
if (lo && !pnfs_layout_is_valid(lo)) {
@@ -7823,6 +7835,15 @@ nfs41_same_server_scope(struct nfs41_server_scope *a,
static void
nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata)
{
+ struct nfs41_bind_conn_to_session_args *args = task->tk_msg.rpc_argp;
+ struct nfs_client *clp = args->client;
+
+ switch (task->tk_status) {
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_DEADSESSION:
+ nfs4_schedule_session_recovery(clp->cl_session,
+ task->tk_status);
+ }
}
static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = {
@@ -8870,8 +8891,6 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
case -NFS4ERR_BADSESSION:
case -NFS4ERR_DEADSESSION:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
- nfs4_schedule_session_recovery(clp->cl_session,
- task->tk_status);
break;
default:
nfs4_schedule_lease_recovery(clp);
@@ -9900,6 +9919,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_ALLOCATE
| NFS_CAP_COPY
| NFS_CAP_OFFLOAD_CANCEL
+ | NFS_CAP_COPY_NOTIFY
| NFS_CAP_DEALLOCATE
| NFS_CAP_SEEK
| NFS_CAP_LAYOUTSTATS
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 0c6d53dc3672..ba97b9cf632c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1407,7 +1407,7 @@ nfs_state_find_lock_state_by_stateid(struct nfs4_state *state,
list_for_each_entry(pos, &state->lock_states, ls_locks) {
if (!test_bit(NFS_LOCK_INITIALIZED, &pos->ls_flags))
continue;
- if (nfs4_stateid_match_other(&pos->ls_stateid, stateid))
+ if (nfs4_stateid_match_or_older(&pos->ls_stateid, stateid))
return pos;
}
return NULL;
@@ -1441,12 +1441,13 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
state = ctx->state;
if (state == NULL)
continue;
- if (nfs4_stateid_match_other(&state->stateid, stateid) &&
+ if (nfs4_stateid_match_or_older(&state->stateid, stateid) &&
nfs4_state_mark_reclaim_nograce(clp, state)) {
found = true;
continue;
}
- if (nfs4_stateid_match_other(&state->open_stateid, stateid) &&
+ if (test_bit(NFS_OPEN_STATE, &state->flags) &&
+ nfs4_stateid_match_or_older(&state->open_stateid, stateid) &&
nfs4_state_mark_reclaim_nograce(clp, state)) {
found = true;
continue;
@@ -1556,16 +1557,32 @@ static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state
{
struct nfs4_copy_state *copy;
- if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags))
+ if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) &&
+ !test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags))
return;
spin_lock(&sp->so_server->nfs_client->cl_lock);
list_for_each_entry(copy, &sp->so_server->ss_copies, copies) {
- if (!nfs4_stateid_match_other(&state->stateid, &copy->parent_state->stateid))
- continue;
+ if ((test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) &&
+ !nfs4_stateid_match_other(&state->stateid,
+ &copy->parent_dst_state->stateid)))
+ continue;
copy->flags = 1;
- complete(&copy->completion);
- break;
+ if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE,
+ &state->flags)) {
+ clear_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags);
+ complete(&copy->completion);
+ }
+ }
+ list_for_each_entry(copy, &sp->so_server->ss_copies, src_copies) {
+ if ((test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags) &&
+ !nfs4_stateid_match_other(&state->stateid,
+ &copy->parent_src_state->stateid)))
+ continue;
+ copy->flags = 1;
+ if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE,
+ &state->flags))
+ complete(&copy->completion);
}
spin_unlock(&sp->so_server->nfs_client->cl_lock);
}
@@ -1609,6 +1626,9 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
struct nfs4_state *state;
unsigned int loop = 0;
int status = 0;
+#ifdef CONFIG_NFS_V4_2
+ bool found_ssc_copy_state = false;
+#endif /* CONFIG_NFS_V4_2 */
/* Note: we rely on the sp->so_states list being ordered
* so that we always reclaim open(O_RDWR) and/or open(O_WRITE)
@@ -1628,6 +1648,13 @@ restart:
continue;
if (state->state == 0)
continue;
+#ifdef CONFIG_NFS_V4_2
+ if (test_bit(NFS_SRV_SSC_COPY_STATE, &state->flags)) {
+ nfs4_state_mark_recovery_failed(state, -EIO);
+ found_ssc_copy_state = true;
+ continue;
+ }
+#endif /* CONFIG_NFS_V4_2 */
refcount_inc(&state->count);
spin_unlock(&sp->so_lock);
status = __nfs4_reclaim_open_state(sp, state, ops);
@@ -1682,6 +1709,10 @@ restart:
}
raw_write_seqcount_end(&sp->so_reclaim_seqcount);
spin_unlock(&sp->so_lock);
+#ifdef CONFIG_NFS_V4_2
+ if (found_ssc_copy_state)
+ return -EIO;
+#endif /* CONFIG_NFS_V4_2 */
return 0;
out_err:
nfs4_put_open_state(state);
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 04c57066a11a..2c9cbade561a 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -92,8 +92,8 @@ static void nfs4_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
- /* If we are holding a delegation, return it! */
- nfs_inode_return_delegation_noreclaim(inode);
+ /* If we are holding a delegation, return and free it */
+ nfs_inode_evict_delegation(inode);
/* Note that above delegreturn would trigger pnfs return-on-close */
pnfs_return_layout(inode);
pnfs_destroy_layout(NFS_I(inode));
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index ab07db0f07cd..936c57779ff4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1059,7 +1059,7 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve
}
static __be32 *
-xdr_encode_nfstime4(__be32 *p, const struct timespec *t)
+xdr_encode_nfstime4(__be32 *p, const struct timespec64 *t)
{
p = xdr_encode_hyper(p, (__s64)t->tv_sec);
*p++ = cpu_to_be32(t->tv_nsec);
@@ -1072,7 +1072,6 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
const struct nfs_server *server,
const uint32_t attrmask[])
{
- struct timespec ts;
char owner_name[IDMAP_NAMESZ];
char owner_group[IDMAP_NAMESZ];
int owner_namelen = 0;
@@ -1161,16 +1160,14 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
if (iap->ia_valid & ATTR_ATIME_SET) {
*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
- ts = timespec64_to_timespec(iap->ia_atime);
- p = xdr_encode_nfstime4(p, &ts);
+ p = xdr_encode_nfstime4(p, &iap->ia_atime);
} else
*p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
}
if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
if (iap->ia_valid & ATTR_MTIME_SET) {
*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
- ts = timespec64_to_timespec(iap->ia_mtime);
- p = xdr_encode_nfstime4(p, &ts);
+ p = xdr_encode_nfstime4(p, &iap->ia_mtime);
} else
*p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
}
@@ -4065,17 +4062,17 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
}
static __be32 *
-xdr_decode_nfstime4(__be32 *p, struct timespec *t)
+xdr_decode_nfstime4(__be32 *p, struct timespec64 *t)
{
__u64 sec;
p = xdr_decode_hyper(p, &sec);
- t-> tv_sec = (time_t)sec;
+ t-> tv_sec = sec;
t->tv_nsec = be32_to_cpup(p++);
return p;
}
-static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
+static int decode_attr_time(struct xdr_stream *xdr, struct timespec64 *time)
{
__be32 *p;
@@ -4086,7 +4083,7 @@ static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
return 0;
}
-static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
+static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time)
{
int status = 0;
@@ -4104,7 +4101,7 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str
return status;
}
-static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
+static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time)
{
int status = 0;
@@ -4123,7 +4120,7 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s
}
static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap,
- struct timespec *time)
+ struct timespec64 *time)
{
int status = 0;
@@ -4186,7 +4183,7 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
return status;
}
-static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
+static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time)
{
int status = 0;
@@ -7581,6 +7578,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC42(CLONE, enc_clone, dec_clone),
PROC42(COPY, enc_copy, dec_copy),
PROC42(OFFLOAD_CANCEL, enc_offload_cancel, dec_offload_cancel),
+ PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify),
PROC(LOOKUPP, enc_lookupp, dec_lookupp),
PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror),
};
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 361cc10d6f95..f64a33d2a1d1 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -1065,6 +1065,39 @@ TRACE_EVENT(nfs_commit_done,
)
);
+TRACE_EVENT(nfs_fh_to_dentry,
+ TP_PROTO(
+ const struct super_block *sb,
+ const struct nfs_fh *fh,
+ u64 fileid,
+ int error
+ ),
+
+ TP_ARGS(sb, fh, fileid, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error;
+ __entry->dev = sb->s_dev;
+ __entry->fileid = fileid;
+ __entry->fhandle = nfs_fhandle_hash(fh);
+ ),
+
+ TP_printk(
+ "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x ",
+ __entry->error,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle
+ )
+);
+
TRACE_DEFINE_ENUM(NFS_OK);
TRACE_DEFINE_ENUM(NFSERR_PERM);
TRACE_DEFINE_ENUM(NFSERR_NOENT);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bb80034a7661..cec3070ab577 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2160,8 +2160,6 @@ out_unlock:
return NULL;
}
-extern const nfs4_stateid current_stateid;
-
static void _lgopen_prepare_attached(struct nfs4_opendata *data,
struct nfs_open_context *ctx)
{
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index a84df7d63403..8d8d04bb9d64 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1592,7 +1592,7 @@ static int nfs_parse_mount_options(char *raw,
dfprintk(MOUNT, "NFS: invalid "
"lookupcache argument\n");
return 0;
- };
+ }
break;
case Opt_fscache_uniq:
if (nfs_get_option_str(args, &mnt->fscache_uniq))
@@ -1625,7 +1625,7 @@ static int nfs_parse_mount_options(char *raw,
dfprintk(MOUNT, "NFS: invalid "
"local_lock argument\n");
return 0;
- };
+ }
break;
/*
@@ -2585,7 +2585,7 @@ static void nfs_get_cache_cookie(struct super_block *sb,
if (mnt_s->fscache_key) {
uniq = mnt_s->fscache_key->key.uniquifier;
ulen = mnt_s->fscache_key->key.uniq_len;
- };
+ }
} else
return;
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 4f3390b20239..c489496b5659 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -121,8 +121,7 @@ static void nfs_netns_client_release(struct kobject *kobj)
struct nfs_netns_client,
kobject);
- if (c->identifier)
- kfree(c->identifier);
+ kfree(c->identifier);
kfree(c);
}
OpenPOWER on IntegriCloud