summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/btrfs/backref.c17
-rw-r--r--fs/btrfs/delayed-inode.c2
-rw-r--r--fs/btrfs/extent-tree.c141
-rw-r--r--fs/btrfs/extent_io.c73
-rw-r--r--fs/btrfs/free-space-cache.c26
-rw-r--r--fs/btrfs/inode.c21
-rw-r--r--fs/btrfs/ioctl.c3
-rw-r--r--fs/btrfs/ordered-data.c14
-rw-r--r--fs/btrfs/volumes.c16
-rw-r--r--fs/cifs/cifs_dfs_ref.c3
-rw-r--r--fs/cifs/cifs_unicode.c182
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsproto.h4
-rw-r--r--fs/cifs/cifssmb.c23
-rw-r--r--fs/cifs/connect.c3
-rw-r--r--fs/cifs/dir.c3
-rw-r--r--fs/cifs/file.c7
-rw-r--r--fs/cifs/inode.c31
-rw-r--r--fs/cifs/link.c3
-rw-r--r--fs/cifs/readdir.c2
-rw-r--r--fs/cifs/smb1ops.c3
-rw-r--r--fs/cifs/smb2pdu.c2
-rw-r--r--fs/configfs/mount.c2
-rw-r--r--fs/dcache.c8
-rw-r--r--fs/efivarfs/super.c2
-rw-r--r--fs/exec.c3
-rw-r--r--fs/ext4/Kconfig9
-rw-r--r--fs/ext4/crypto_fname.c280
-rw-r--r--fs/ext4/crypto_key.c1
-rw-r--r--fs/ext4/crypto_policy.c14
-rw-r--r--fs/ext4/dir.c2
-rw-r--r--fs/ext4/ext4.h17
-rw-r--r--fs/ext4/ext4_crypto.h11
-rw-r--r--fs/ext4/ext4_jbd2.c6
-rw-r--r--fs/ext4/extents.c25
-rw-r--r--fs/ext4/extents_status.c8
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/ext4/namei.c72
-rw-r--r--fs/ext4/resize.c7
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/ext4/symlink.c2
-rw-r--r--fs/f2fs/data.c7
-rw-r--r--fs/f2fs/f2fs.h1
-rw-r--r--fs/f2fs/namei.c8
-rw-r--r--fs/f2fs/super.c1
-rw-r--r--fs/fhandle.c5
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/jbd2/recovery.c10
-rw-r--r--fs/jbd2/revoke.c18
-rw-r--r--fs/jbd2/transaction.c25
-rw-r--r--fs/kernfs/dir.c9
-rw-r--r--fs/namei.c22
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/nfs/nfs4proc.c3
-rw-r--r--fs/nfs/write.c13
-rw-r--r--fs/nfsd/blocklayout.c11
-rw-r--r--fs/nfsd/nfs4callback.c119
-rw-r--r--fs/nfsd/nfs4state.c147
-rw-r--r--fs/nfsd/state.h19
-rw-r--r--fs/nfsd/xdr4.h1
-rw-r--r--fs/nilfs2/btree.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c13
-rw-r--r--fs/omfs/bitmap.c2
-rw-r--r--fs/omfs/inode.c10
-rw-r--r--fs/overlayfs/copy_up.c3
-rw-r--r--fs/overlayfs/dir.c33
-rw-r--r--fs/overlayfs/super.c10
-rw-r--r--fs/splice.c12
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c8
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c31
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c9
-rw-r--r--fs/xfs/xfs_attr_inactive.c83
-rw-r--r--fs/xfs/xfs_file.c2
-rw-r--r--fs/xfs/xfs_inode.c22
-rw-r--r--fs/xfs/xfs_mount.c34
77 files changed, 1141 insertions, 620 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 241ef68d2893..cd46e4158830 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -918,7 +918,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
total_size = total_mapping_size(elf_phdata,
loc->elf_ex.e_phnum);
if (!total_size) {
- error = -EINVAL;
+ retval = -EINVAL;
goto out_free_dentry;
}
}
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 9de772ee0031..614aaa1969bd 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -880,6 +880,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
* indirect refs to their parent bytenr.
* When roots are found, they're added to the roots list
*
+ * NOTE: This can return values > 0
+ *
* FIXME some caching might speed things up
*/
static int find_parent_nodes(struct btrfs_trans_handle *trans,
@@ -1198,6 +1200,19 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
return ret;
}
+/**
+ * btrfs_check_shared - tell us whether an extent is shared
+ *
+ * @trans: optional trans handle
+ *
+ * btrfs_check_shared uses the backref walking code but will short
+ * circuit as soon as it finds a root or inode that doesn't match the
+ * one passed in. This provides a significant performance benefit for
+ * callers (such as fiemap) which want to know whether the extent is
+ * shared but do not need a ref count.
+ *
+ * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
+ */
int btrfs_check_shared(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 root_objectid,
u64 inum, u64 bytenr)
@@ -1226,11 +1241,13 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans,
ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
roots, NULL, root_objectid, inum);
if (ret == BACKREF_FOUND_SHARED) {
+ /* this is the only condition under which we return 1 */
ret = 1;
break;
}
if (ret < 0 && ret != -ENOENT)
break;
+ ret = 0;
node = ulist_next(tmp, &uiter);
if (!node)
break;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index cde698a07d21..a2ae42720a6a 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1802,6 +1802,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
+ BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
+
inode->i_version = btrfs_stack_inode_sequence(inode_item);
inode->i_rdev = 0;
*rdev = btrfs_stack_inode_rdev(inode_item);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1eef4ee01d1a..0ec3acd14cbf 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3178,10 +3178,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
btrfs_mark_buffer_dirty(leaf);
- btrfs_release_path(path);
fail:
- if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_release_path(path);
return ret;
}
@@ -3305,8 +3303,7 @@ again:
spin_lock(&block_group->lock);
if (block_group->cached != BTRFS_CACHE_FINISHED ||
- !btrfs_test_opt(root, SPACE_CACHE) ||
- block_group->delalloc_bytes) {
+ !btrfs_test_opt(root, SPACE_CACHE)) {
/*
* don't bother trying to write stuff out _if_
* a) we're not cached,
@@ -3408,17 +3405,14 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
int loops = 0;
spin_lock(&cur_trans->dirty_bgs_lock);
- if (!list_empty(&cur_trans->dirty_bgs)) {
- list_splice_init(&cur_trans->dirty_bgs, &dirty);
+ if (list_empty(&cur_trans->dirty_bgs)) {
+ spin_unlock(&cur_trans->dirty_bgs_lock);
+ return 0;
}
+ list_splice_init(&cur_trans->dirty_bgs, &dirty);
spin_unlock(&cur_trans->dirty_bgs_lock);
again:
- if (list_empty(&dirty)) {
- btrfs_free_path(path);
- return 0;
- }
-
/*
* make sure all the block groups on our dirty list actually
* exist
@@ -3431,18 +3425,16 @@ again:
return -ENOMEM;
}
+ /*
+ * cache_write_mutex is here only to save us from balance or automatic
+ * removal of empty block groups deleting this block group while we are
+ * writing out the cache
+ */
+ mutex_lock(&trans->transaction->cache_write_mutex);
while (!list_empty(&dirty)) {
cache = list_first_entry(&dirty,
struct btrfs_block_group_cache,
dirty_list);
-
- /*
- * cache_write_mutex is here only to save us from balance
- * deleting this block group while we are writing out the
- * cache
- */
- mutex_lock(&trans->transaction->cache_write_mutex);
-
/*
* this can happen if something re-dirties a block
* group that is already under IO. Just wait for it to
@@ -3493,9 +3485,30 @@ again:
ret = 0;
}
}
- if (!ret)
+ if (!ret) {
ret = write_one_cache_group(trans, root, path, cache);
- mutex_unlock(&trans->transaction->cache_write_mutex);
+ /*
+ * Our block group might still be attached to the list
+ * of new block groups in the transaction handle of some
+ * other task (struct btrfs_trans_handle->new_bgs). This
+ * means its block group item isn't yet in the extent
+ * tree. If this happens ignore the error, as we will
+ * try again later in the critical section of the
+ * transaction commit.
+ */
+ if (ret == -ENOENT) {
+ ret = 0;
+ spin_lock(&cur_trans->dirty_bgs_lock);
+ if (list_empty(&cache->dirty_list)) {
+ list_add_tail(&cache->dirty_list,
+ &cur_trans->dirty_bgs);
+ btrfs_get_block_group(cache);
+ }
+ spin_unlock(&cur_trans->dirty_bgs_lock);
+ } else if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ }
+ }
/* if its not on the io list, we need to put the block group */
if (should_put)
@@ -3503,7 +3516,16 @@ again:
if (ret)
break;
+
+ /*
+ * Avoid blocking other tasks for too long. It might even save
+ * us from writing caches for block groups that are going to be
+ * removed.
+ */
+ mutex_unlock(&trans->transaction->cache_write_mutex);
+ mutex_lock(&trans->transaction->cache_write_mutex);
}
+ mutex_unlock(&trans->transaction->cache_write_mutex);
/*
* go through delayed refs for all the stuff we've just kicked off
@@ -3514,8 +3536,15 @@ again:
loops++;
spin_lock(&cur_trans->dirty_bgs_lock);
list_splice_init(&cur_trans->dirty_bgs, &dirty);
+ /*
+ * dirty_bgs_lock protects us from concurrent block group
+ * deletes too (not just cache_write_mutex).
+ */
+ if (!list_empty(&dirty)) {
+ spin_unlock(&cur_trans->dirty_bgs_lock);
+ goto again;
+ }
spin_unlock(&cur_trans->dirty_bgs_lock);
- goto again;
}
btrfs_free_path(path);
@@ -3588,8 +3617,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
ret = 0;
}
}
- if (!ret)
+ if (!ret) {
ret = write_one_cache_group(trans, root, path, cache);
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
+ }
/* if its not on the io list, we need to put the block group */
if (should_put)
@@ -7537,7 +7569,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
* returns the key for the extent through ins, and a tree buffer for
* the first block of the extent through buf.
*
- * returns the tree buffer or NULL.
+ * returns the tree buffer or an ERR_PTR on error.
*/
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -7548,6 +7580,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_key ins;
struct btrfs_block_rsv *block_rsv;
struct extent_buffer *buf;
+ struct btrfs_delayed_extent_op *extent_op;
u64 flags = 0;
int ret;
u32 blocksize = root->nodesize;
@@ -7568,13 +7601,14 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
ret = btrfs_reserve_extent(root, blocksize, blocksize,
empty_size, hint, &ins, 0, 0);
- if (ret) {
- unuse_block_rsv(root->fs_info, block_rsv, blocksize);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto out_unuse;
buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
- BUG_ON(IS_ERR(buf)); /* -ENOMEM */
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto out_free_reserved;
+ }
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
if (parent == 0)
@@ -7584,9 +7618,11 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
BUG_ON(parent > 0);
if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
- struct btrfs_delayed_extent_op *extent_op;
extent_op = btrfs_alloc_delayed_extent_op();
- BUG_ON(!extent_op); /* -ENOMEM */
+ if (!extent_op) {
+ ret = -ENOMEM;
+ goto out_free_buf;
+ }
if (key)
memcpy(&extent_op->key, key, sizeof(extent_op->key));
else
@@ -7601,13 +7637,24 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
extent_op->level = level;
ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
- ins.objectid,
- ins.offset, parent, root_objectid,
- level, BTRFS_ADD_DELAYED_EXTENT,
- extent_op, 0);
- BUG_ON(ret); /* -ENOMEM */
+ ins.objectid, ins.offset,
+ parent, root_objectid, level,
+ BTRFS_ADD_DELAYED_EXTENT,
+ extent_op, 0);
+ if (ret)
+ goto out_free_delayed;
}
return buf;
+
+out_free_delayed:
+ btrfs_free_delayed_extent_op(extent_op);
+out_free_buf:
+ free_extent_buffer(buf);
+out_free_reserved:
+ btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0);
+out_unuse:
+ unuse_block_rsv(root->fs_info, block_rsv, blocksize);
+ return ERR_PTR(ret);
}
struct walk_control {
@@ -8782,6 +8829,24 @@ again:
goto again;
}
+ /*
+ * if we are changing raid levels, try to allocate a corresponding
+ * block group with the new raid level.
+ */
+ alloc_flags = update_block_group_flags(root, cache->flags);
+ if (alloc_flags != cache->flags) {
+ ret = do_chunk_alloc(trans, root, alloc_flags,
+ CHUNK_ALLOC_FORCE);
+ /*
+ * ENOSPC is allowed here, we may have enough space
+ * already allocated at the new raid level to
+ * carry on
+ */
+ if (ret == -ENOSPC)
+ ret = 0;
+ if (ret < 0)
+ goto out;
+ }
ret = set_block_group_ro(cache, 0);
if (!ret)
@@ -8795,7 +8860,9 @@ again:
out:
if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
alloc_flags = update_block_group_flags(root, cache->flags);
+ lock_chunks(root->fs_info->chunk_root);
check_system_chunk(trans, root, alloc_flags);
+ unlock_chunks(root->fs_info->chunk_root);
}
mutex_unlock(&root->fs_info->ro_block_group_mutex);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 782f3bc4651d..c32d226bfecc 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4560,36 +4560,37 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
do {
index--;
page = eb->pages[index];
- if (page && mapped) {
+ if (!page)
+ continue;
+ if (mapped)
spin_lock(&page->mapping->private_lock);
+ /*
+ * We do this since we'll remove the pages after we've
+ * removed the eb from the radix tree, so we could race
+ * and have this page now attached to the new eb. So
+ * only clear page_private if it's still connected to
+ * this eb.
+ */
+ if (PagePrivate(page) &&
+ page->private == (unsigned long)eb) {
+ BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+ BUG_ON(PageDirty(page));
+ BUG_ON(PageWriteback(page));
/*
- * We do this since we'll remove the pages after we've
- * removed the eb from the radix tree, so we could race
- * and have this page now attached to the new eb. So
- * only clear page_private if it's still connected to
- * this eb.
+ * We need to make sure we haven't be attached
+ * to a new eb.
*/
- if (PagePrivate(page) &&
- page->private == (unsigned long)eb) {
- BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
- BUG_ON(PageDirty(page));
- BUG_ON(PageWriteback(page));
- /*
- * We need to make sure we haven't be attached
- * to a new eb.
- */
- ClearPagePrivate(page);
- set_page_private(page, 0);
- /* One for the page private */
- page_cache_release(page);
- }
- spin_unlock(&page->mapping->private_lock);
-
- }
- if (page) {
- /* One for when we alloced the page */
+ ClearPagePrivate(page);
+ set_page_private(page, 0);
+ /* One for the page private */
page_cache_release(page);
}
+
+ if (mapped)
+ spin_unlock(&page->mapping->private_lock);
+
+ /* One for when we alloced the page */
+ page_cache_release(page);
} while (index != 0);
}
@@ -4771,6 +4772,25 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
start >> PAGE_CACHE_SHIFT);
if (eb && atomic_inc_not_zero(&eb->refs)) {
rcu_read_unlock();
+ /*
+ * Lock our eb's refs_lock to avoid races with
+ * free_extent_buffer. When we get our eb it might be flagged
+ * with EXTENT_BUFFER_STALE and another task running
+ * free_extent_buffer might have seen that flag set,
+ * eb->refs == 2, that the buffer isn't under IO (dirty and
+ * writeback flags not set) and it's still in the tree (flag
+ * EXTENT_BUFFER_TREE_REF set), therefore being in the process
+ * of decrementing the extent buffer's reference count twice.
+ * So here we could race and increment the eb's reference count,
+ * clear its stale flag, mark it as dirty and drop our reference
+ * before the other task finishes executing free_extent_buffer,
+ * which would later result in an attempt to free an extent
+ * buffer that is dirty.
+ */
+ if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
+ spin_lock(&eb->refs_lock);
+ spin_unlock(&eb->refs_lock);
+ }
mark_extent_buffer_accessed(eb, NULL);
return eb;
}
@@ -4870,6 +4890,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
mark_extent_buffer_accessed(exists, p);
goto free_eb;
}
+ exists = NULL;
/*
* Do this so attach doesn't complain and we need to
@@ -4933,12 +4954,12 @@ again:
return eb;
free_eb:
+ WARN_ON(!atomic_dec_and_test(&eb->refs));
for (i = 0; i < num_pages; i++) {
if (eb->pages[i])
unlock_page(eb->pages[i]);
}
- WARN_ON(!atomic_dec_and_test(&eb->refs));
btrfs_release_extent_buffer(eb);
return exists;
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 81fa75a8e1f3..9dbe5b548fa6 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -86,7 +86,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
mapping_set_gfp_mask(inode->i_mapping,
mapping_gfp_mask(inode->i_mapping) &
- ~(GFP_NOFS & ~__GFP_HIGHMEM));
+ ~(__GFP_FS | __GFP_HIGHMEM));
return inode;
}
@@ -1218,7 +1218,7 @@ out:
*
* This function writes out a free space cache struct to disk for quick recovery
* on mount. This will return 0 if it was successfull in writing the cache out,
- * and -1 if it was not.
+ * or an errno if it was not.
*/
static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space_ctl *ctl,
@@ -1235,12 +1235,12 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
int must_iput = 0;
if (!i_size_read(inode))
- return -1;
+ return -EIO;
WARN_ON(io_ctl->pages);
ret = io_ctl_init(io_ctl, inode, root, 1);
if (ret)
- return -1;
+ return ret;
if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
down_write(&block_group->data_rwsem);
@@ -1258,7 +1258,9 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
}
/* Lock all pages first so we can lock the extent safely. */
- io_ctl_prepare_pages(io_ctl, inode, 0);
+ ret = io_ctl_prepare_pages(io_ctl, inode, 0);
+ if (ret)
+ goto out;
lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
0, &cached_state);
@@ -3464,6 +3466,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
int ret;
struct btrfs_io_ctl io_ctl;
+ bool release_metadata = true;
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
return 0;
@@ -3471,11 +3474,20 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
memset(&io_ctl, 0, sizeof(io_ctl));
ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
trans, path, 0);
- if (!ret)
+ if (!ret) {
+ /*
+ * At this point writepages() didn't error out, so our metadata
+ * reservation is released when the writeback finishes, at
+ * inode.c:btrfs_finish_ordered_io(), regardless of it finishing
+ * with or without an error.
+ */
+ release_metadata = false;
ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0);
+ }
if (ret) {
- btrfs_delalloc_release_metadata(inode, inode->i_size);
+ if (release_metadata)
+ btrfs_delalloc_release_metadata(inode, inode->i_size);
#ifdef DEBUG
btrfs_err(root->fs_info,
"failed to write free ino cache for root %llu",
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ada4d24ed11b..8bb013672aee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3632,25 +3632,28 @@ static void btrfs_read_locked_inode(struct inode *inode)
BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
+ inode->i_version = btrfs_inode_sequence(leaf, inode_item);
+ inode->i_generation = BTRFS_I(inode)->generation;
+ inode->i_rdev = 0;
+ rdev = btrfs_inode_rdev(leaf, inode_item);
+
+ BTRFS_I(inode)->index_cnt = (u64)-1;
+ BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
+
+cache_index:
/*
* If we were modified in the current generation and evicted from memory
* and then re-read we need to do a full sync since we don't have any
* idea about which extents were modified before we were evicted from
* cache.
+ *
+ * This is required for both inode re-read from disk and delayed inode
+ * in delayed_nodes_tree.
*/
if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
- inode->i_version = btrfs_inode_sequence(leaf, inode_item);
- inode->i_generation = BTRFS_I(inode)->generation;
- inode->i_rdev = 0;
- rdev = btrfs_inode_rdev(leaf, inode_item);
-
- BTRFS_I(inode)->index_cnt = (u64)-1;
- BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
-
-cache_index:
path->slots[0]++;
if (inode->i_nlink != 1 ||
path->slots[0] >= btrfs_header_nritems(leaf))
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b05653f182c2..1c22c6518504 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2410,7 +2410,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
"Attempt to delete subvolume %llu during send",
dest->root_key.objectid);
err = -EPERM;
- goto out_dput;
+ goto out_unlock_inode;
}
d_invalidate(dentry);
@@ -2505,6 +2505,7 @@ out_up_write:
root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
}
+out_unlock_inode:
mutex_unlock(&inode->i_mutex);
if (!err) {
shrink_dcache_sb(root->fs_info->sb);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 157cc54fc634..760c4a5e096b 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -722,6 +722,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
{
int ret = 0;
+ int ret_wb = 0;
u64 end;
u64 orig_end;
struct btrfs_ordered_extent *ordered;
@@ -741,9 +742,14 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
if (ret)
return ret;
- ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
- if (ret)
- return ret;
+ /*
+ * If we have a writeback error don't return immediately. Wait first
+ * for any ordered extents that haven't completed yet. This is to make
+ * sure no one can dirty the same page ranges and call writepages()
+ * before the ordered extents complete - to avoid failures (-EEXIST)
+ * when adding the new ordered extents to the ordered tree.
+ */
+ ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
end = orig_end;
while (1) {
@@ -767,7 +773,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
break;
end--;
}
- return ret;
+ return ret_wb ? ret_wb : ret;
}
/*
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8bcd2a007517..174f5e1e00ab 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1058,6 +1058,7 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
struct extent_map *em;
struct list_head *search_list = &trans->transaction->pending_chunks;
int ret = 0;
+ u64 physical_start = *start;
again:
list_for_each_entry(em, search_list, list) {
@@ -1068,9 +1069,9 @@ again:
for (i = 0; i < map->num_stripes; i++) {
if (map->stripes[i].dev != device)
continue;
- if (map->stripes[i].physical >= *start + len ||
+ if (map->stripes[i].physical >= physical_start + len ||
map->stripes[i].physical + em->orig_block_len <=
- *start)
+ physical_start)
continue;
*start = map->stripes[i].physical +
em->orig_block_len;
@@ -1193,8 +1194,14 @@ again:
*/
if (contains_pending_extent(trans, device,
&search_start,
- hole_size))
- hole_size = 0;
+ hole_size)) {
+ if (key.offset >= search_start) {
+ hole_size = key.offset - search_start;
+ } else {
+ WARN_ON_ONCE(1);
+ hole_size = 0;
+ }
+ }
if (hole_size > max_hole_size) {
max_hole_start = search_start;
@@ -4618,6 +4625,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
{
u64 chunk_offset;
+ ASSERT(mutex_is_locked(&extent_root->fs_info->chunk_mutex));
chunk_offset = find_next_chunk(extent_root->fs_info);
return __btrfs_alloc_chunk(trans, extent_root, chunk_offset, type);
}
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 430e0348c99e..7dc886c9a78f 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -24,6 +24,7 @@
#include "cifsfs.h"
#include "dns_resolve.h"
#include "cifs_debug.h"
+#include "cifs_unicode.h"
static LIST_HEAD(cifs_dfs_automount_list);
@@ -312,7 +313,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
xid = get_xid();
rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls,
&num_referrals, &referrals,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ cifs_remap(cifs_sb));
free_xid(xid);
cifs_put_tlink(tlink);
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 0303c6793d90..5a53ac6b1e02 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -27,41 +27,6 @@
#include "cifsglob.h"
#include "cifs_debug.h"
-/*
- * cifs_utf16_bytes - how long will a string be after conversion?
- * @utf16 - pointer to input string
- * @maxbytes - don't go past this many bytes of input string
- * @codepage - destination codepage
- *
- * Walk a utf16le string and return the number of bytes that the string will
- * be after being converted to the given charset, not including any null
- * termination required. Don't walk past maxbytes in the source buffer.
- */
-int
-cifs_utf16_bytes(const __le16 *from, int maxbytes,
- const struct nls_table *codepage)
-{
- int i;
- int charlen, outlen = 0;
- int maxwords = maxbytes / 2;
- char tmp[NLS_MAX_CHARSET_SIZE];
- __u16 ftmp;
-
- for (i = 0; i < maxwords; i++) {
- ftmp = get_unaligned_le16(&from[i]);
- if (ftmp == 0)
- break;
-
- charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
- if (charlen > 0)
- outlen += charlen;
- else
- outlen++;
- }
-
- return outlen;
-}
-
int cifs_remap(struct cifs_sb_info *cifs_sb)
{
int map_type;
@@ -155,10 +120,13 @@ convert_sfm_char(const __u16 src_char, char *target)
* enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
*/
static int
-cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
+cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp,
int maptype)
{
int len = 1;
+ __u16 src_char;
+
+ src_char = *from;
if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target))
return len;
@@ -168,10 +136,23 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
/* if character not one of seven in special remap set */
len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
- if (len <= 0) {
- *target = '?';
- len = 1;
- }
+ if (len <= 0)
+ goto surrogate_pair;
+
+ return len;
+
+surrogate_pair:
+ /* convert SURROGATE_PAIR and IVS */
+ if (strcmp(cp->charset, "utf8"))
+ goto unknown;
+ len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6);
+ if (len <= 0)
+ goto unknown;
+ return len;
+
+unknown:
+ *target = '?';
+ len = 1;
return len;
}
@@ -206,7 +187,7 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
int nullsize = nls_nullsize(codepage);
int fromwords = fromlen / 2;
char tmp[NLS_MAX_CHARSET_SIZE];
- __u16 ftmp;
+ __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */
/*
* because the chars can be of varying widths, we need to take care
@@ -217,9 +198,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
for (i = 0; i < fromwords; i++) {
- ftmp = get_unaligned_le16(&from[i]);
- if (ftmp == 0)
+ ftmp[0] = get_unaligned_le16(&from[i]);
+ if (ftmp[0] == 0)
break;
+ if (i + 1 < fromwords)
+ ftmp[1] = get_unaligned_le16(&from[i + 1]);
+ else
+ ftmp[1] = 0;
+ if (i + 2 < fromwords)
+ ftmp[2] = get_unaligned_le16(&from[i + 2]);
+ else
+ ftmp[2] = 0;
/*
* check to see if converting this character might make the
@@ -234,6 +223,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
/* put converted char into 'to' buffer */
charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type);
outlen += charlen;
+
+ /* charlen (=bytes of UTF-8 for 1 character)
+ * 4bytes UTF-8(surrogate pair) is charlen=4
+ * (4bytes UTF-16 code)
+ * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4
+ * (2 UTF-8 pairs divided to 2 UTF-16 pairs) */
+ if (charlen == 4)
+ i++;
+ else if (charlen >= 5)
+ /* 5-6bytes UTF-8 */
+ i += 2;
}
/* properly null-terminate string */
@@ -296,6 +296,46 @@ success:
}
/*
+ * cifs_utf16_bytes - how long will a string be after conversion?
+ * @utf16 - pointer to input string
+ * @maxbytes - don't go past this many bytes of input string
+ * @codepage - destination codepage
+ *
+ * Walk a utf16le string and return the number of bytes that the string will
+ * be after being converted to the given charset, not including any null
+ * termination required. Don't walk past maxbytes in the source buffer.
+ */
+int
+cifs_utf16_bytes(const __le16 *from, int maxbytes,
+ const struct nls_table *codepage)
+{
+ int i;
+ int charlen, outlen = 0;
+ int maxwords = maxbytes / 2;
+ char tmp[NLS_MAX_CHARSET_SIZE];
+ __u16 ftmp[3];
+
+ for (i = 0; i < maxwords; i++) {
+ ftmp[0] = get_unaligned_le16(&from[i]);
+ if (ftmp[0] == 0)
+ break;
+ if (i + 1 < maxwords)
+ ftmp[1] = get_unaligned_le16(&from[i + 1]);
+ else
+ ftmp[1] = 0;
+ if (i + 2 < maxwords)
+ ftmp[2] = get_unaligned_le16(&from[i + 2]);
+ else
+ ftmp[2] = 0;
+
+ charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD);
+ outlen += charlen;
+ }
+
+ return outlen;
+}
+
+/*
* cifs_strndup_from_utf16 - copy a string from wire format to the local
* codepage
* @src - source string
@@ -409,10 +449,15 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
char src_char;
__le16 dst_char;
wchar_t tmp;
+ wchar_t *wchar_to; /* UTF-16 */
+ int ret;
+ unicode_t u;
if (map_chars == NO_MAP_UNI_RSVD)
return cifs_strtoUTF16(target, source, PATH_MAX, cp);
+ wchar_to = kzalloc(6, GFP_KERNEL);
+
for (i = 0; i < srclen; j++) {
src_char = source[i];
charlen = 1;
@@ -441,11 +486,55 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
* if no match, use question mark, which at least in
* some cases serves as wild card
*/
- if (charlen < 1) {
- dst_char = cpu_to_le16(0x003f);
- charlen = 1;
+ if (charlen > 0)
+ goto ctoUTF16;
+
+ /* convert SURROGATE_PAIR */
+ if (strcmp(cp->charset, "utf8") || !wchar_to)
+ goto unknown;
+ if (*(source + i) & 0x80) {
+ charlen = utf8_to_utf32(source + i, 6, &u);
+ if (charlen < 0)
+ goto unknown;
+ } else
+ goto unknown;
+ ret = utf8s_to_utf16s(source + i, charlen,
+ UTF16_LITTLE_ENDIAN,
+ wchar_to, 6);
+ if (ret < 0)
+ goto unknown;
+
+ i += charlen;
+ dst_char = cpu_to_le16(*wchar_to);
+ if (charlen <= 3)
+ /* 1-3bytes UTF-8 to 2bytes UTF-16 */
+ put_unaligned(dst_char, &target[j]);
+ else if (charlen == 4) {
+ /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16
+ * 7-8bytes UTF-8(IVS) divided to 2 UTF-16
+ * (charlen=3+4 or 4+4) */
+ put_unaligned(dst_char, &target[j]);
+ dst_char = cpu_to_le16(*(wchar_to + 1));
+ j++;
+ put_unaligned(dst_char, &target[j]);
+ } else if (charlen >= 5) {
+ /* 5-6bytes UTF-8 to 6bytes UTF-16 */
+ put_unaligned(dst_char, &target[j]);
+ dst_char = cpu_to_le16(*(wchar_to + 1));
+ j++;
+ put_unaligned(dst_char, &target[j]);
+ dst_char = cpu_to_le16(*(wchar_to + 2));
+ j++;
+ put_unaligned(dst_char, &target[j]);
}
+ continue;
+
+unknown:
+ dst_char = cpu_to_le16(0x003f);
+ charlen = 1;
}
+
+ctoUTF16:
/*
* character may take more than one byte in the source string,
* but will take exactly two bytes in the target string
@@ -456,6 +545,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
ctoUTF16_out:
put_unaligned(0, &target[j]); /* Null terminate target unicode string */
+ kfree(wchar_to);
return j;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f5089bde3635..0a9fb6b53126 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -469,6 +469,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_puts(s, ",nouser_xattr");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
seq_puts(s, ",mapchars");
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR)
+ seq_puts(s, ",mapposix");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
seq_puts(s, ",sfu");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index c31ce98c1704..c63fd1dde25b 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -361,11 +361,11 @@ extern int CIFSUnixCreateHardLink(const unsigned int xid,
extern int CIFSUnixCreateSymLink(const unsigned int xid,
struct cifs_tcon *tcon,
const char *fromName, const char *toName,
- const struct nls_table *nls_codepage);
+ const struct nls_table *nls_codepage, int remap);
extern int CIFSSMBUnixQuerySymLink(const unsigned int xid,
struct cifs_tcon *tcon,
const unsigned char *searchName, char **syminfo,
- const struct nls_table *nls_codepage);
+ const struct nls_table *nls_codepage, int remap);
extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
__u16 fid, char **symlinkinfo,
const struct nls_table *nls_codepage);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 84650a51c7c4..f26ffbfc64d8 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2784,7 +2784,7 @@ copyRetry:
int
CIFSUnixCreateSymLink(const unsigned int xid, struct cifs_tcon *tcon,
const char *fromName, const char *toName,
- const struct nls_table *nls_codepage)
+ const struct nls_table *nls_codepage, int remap)
{
TRANSACTION2_SPI_REQ *pSMB = NULL;
TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -2804,9 +2804,9 @@ createSymLinkRetry:
if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
name_len =
- cifs_strtoUTF16((__le16 *) pSMB->FileName, fromName,
- /* find define for this maxpathcomponent */
- PATH_MAX, nls_codepage);
+ cifsConvertToUTF16((__le16 *) pSMB->FileName, fromName,
+ /* find define for this maxpathcomponent */
+ PATH_MAX, nls_codepage, remap);
name_len++; /* trailing null */
name_len *= 2;
@@ -2828,9 +2828,9 @@ createSymLinkRetry:
data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
name_len_target =
- cifs_strtoUTF16((__le16 *) data_offset, toName, PATH_MAX
- /* find define for this maxpathcomponent */
- , nls_codepage);
+ cifsConvertToUTF16((__le16 *) data_offset, toName,
+ /* find define for this maxpathcomponent */
+ PATH_MAX, nls_codepage, remap);
name_len_target++; /* trailing null */
name_len_target *= 2;
} else { /* BB improve the check for buffer overruns BB */
@@ -3034,7 +3034,7 @@ winCreateHardLinkRetry:
int
CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
const unsigned char *searchName, char **symlinkinfo,
- const struct nls_table *nls_codepage)
+ const struct nls_table *nls_codepage, int remap)
{
/* SMB_QUERY_FILE_UNIX_LINK */
TRANSACTION2_QPI_REQ *pSMB = NULL;
@@ -3055,8 +3055,9 @@ querySymLinkRetry:
if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
name_len =
- cifs_strtoUTF16((__le16 *) pSMB->FileName, searchName,
- PATH_MAX, nls_codepage);
+ cifsConvertToUTF16((__le16 *) pSMB->FileName,
+ searchName, PATH_MAX, nls_codepage,
+ remap);
name_len++; /* trailing null */
name_len *= 2;
} else { /* BB improve the check for buffer overruns BB */
@@ -4917,7 +4918,7 @@ getDFSRetry:
strncpy(pSMB->RequestFileName, search_name, name_len);
}
- if (ses->server && ses->server->sign)
+ if (ses->server->sign)
pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
pSMB->hdr.Uid = ses->Suid;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index f3bfe08e177b..8383d5ea4202 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -386,6 +386,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
rc = generic_ip_connect(server);
if (rc) {
cifs_dbg(FYI, "reconnect error %d\n", rc);
+ mutex_unlock(&server->srv_mutex);
msleep(3000);
} else {
atomic_inc(&tcpSesReconnectCount);
@@ -393,8 +394,8 @@ cifs_reconnect(struct TCP_Server_Info *server)
if (server->tcpStatus != CifsExiting)
server->tcpStatus = CifsNeedNegotiate;
spin_unlock(&GlobalMid_Lock);
+ mutex_unlock(&server->srv_mutex);
}
- mutex_unlock(&server->srv_mutex);
} while (server->tcpStatus == CifsNeedReconnect);
return rc;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 338d56936f6a..c3eb998a99bd 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -620,8 +620,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
}
rc = CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ cifs_remap(cifs_sb));
if (rc)
goto mknod_out;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index cafbf10521d5..3f50cee79df9 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -140,8 +140,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
posix_flags = cifs_posix_convert_flags(f_flags);
rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
poplock, full_path, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ cifs_remap(cifs_sb));
cifs_put_tlink(tlink);
if (rc)
@@ -1553,8 +1552,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
rc = server->ops->mand_unlock_range(cfile, flock, xid);
out:
- if (flock->fl_flags & FL_POSIX)
- posix_lock_file_wait(file, flock);
+ if (flock->fl_flags & FL_POSIX && !rc)
+ rc = posix_lock_file_wait(file, flock);
return rc;
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 55b58112d122..f621b44cb800 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -373,8 +373,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
/* could have done a find first instead but this returns more info */
rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data,
- cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ cifs_sb->local_nls, cifs_remap(cifs_sb));
cifs_put_tlink(tlink);
if (!rc) {
@@ -402,9 +401,25 @@ int cifs_get_inode_info_unix(struct inode **pinode,
rc = -ENOMEM;
} else {
/* we already have inode, update it */
+
+ /* if uniqueid is different, return error */
+ if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
+ CIFS_I(*pinode)->uniqueid != fattr.cf_uniqueid)) {
+ rc = -ESTALE;
+ goto cgiiu_exit;
+ }
+
+ /* if filetype is different, return error */
+ if (unlikely(((*pinode)->i_mode & S_IFMT) !=
+ (fattr.cf_mode & S_IFMT))) {
+ rc = -ESTALE;
+ goto cgiiu_exit;
+ }
+
cifs_fattr_to_inode(*pinode, &fattr);
}
+cgiiu_exit:
return rc;
}
@@ -839,6 +854,15 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
if (!*inode)
rc = -ENOMEM;
} else {
+ /* we already have inode, update it */
+
+ /* if filetype is different, return error */
+ if (unlikely(((*inode)->i_mode & S_IFMT) !=
+ (fattr.cf_mode & S_IFMT))) {
+ rc = -ESTALE;
+ goto cgii_exit;
+ }
+
cifs_fattr_to_inode(*inode, &fattr);
}
@@ -2215,8 +2239,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
pTcon = tlink_tcon(tlink);
rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args,
cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ cifs_remap(cifs_sb));
cifs_put_tlink(tlink);
}
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 252e672d5604..e6c707cc62b3 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -717,7 +717,8 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
rc = create_mf_symlink(xid, pTcon, cifs_sb, full_path, symname);
else if (pTcon->unix_ext)
rc = CIFSUnixCreateSymLink(xid, pTcon, full_path, symname,
- cifs_sb->local_nls);
+ cifs_sb->local_nls,
+ cifs_remap(cifs_sb));
/* else
rc = CIFSCreateReparseSymLink(xid, pTcon, fromName, toName,
cifs_sb_target->local_nls); */
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b4a47237486b..b1eede3678a9 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -90,6 +90,8 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
if (dentry) {
inode = d_inode(dentry);
if (inode) {
+ if (d_mountpoint(dentry))
+ goto out;
/*
* If we're generating inode numbers, then we don't
* want to clobber the existing one with the one that
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 7bfdd6066276..fc537c29044e 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -960,7 +960,8 @@ cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
/* Check for unix extensions */
if (cap_unix(tcon->ses)) {
rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path,
- cifs_sb->local_nls);
+ cifs_sb->local_nls,
+ cifs_remap(cifs_sb));
if (rc == -EREMOTE)
rc = cifs_unix_dfs_readlink(xid, tcon, full_path,
target_path,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 65cd7a84c8bc..54cbe19d9c08 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -110,7 +110,7 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
/* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */
/* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */
- if ((tcon->ses) &&
+ if ((tcon->ses) && (tcon->ses->server) &&
(tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
hdr->CreditCharge = cpu_to_le16(1);
/* else CreditCharge MBZ */
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index da94e41bdbf6..537356742091 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -173,5 +173,5 @@ MODULE_LICENSE("GPL");
MODULE_VERSION("0.0.2");
MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration.");
-module_init(configfs_init);
+core_initcall(configfs_init);
module_exit(configfs_exit);
diff --git a/fs/dcache.c b/fs/dcache.c
index 656ce522a218..37b5afdaf698 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1239,13 +1239,13 @@ ascend:
/* might go back up the wrong parent if we have had a rename. */
if (need_seqretry(&rename_lock, seq))
goto rename_retry;
- next = child->d_child.next;
- while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) {
+ /* go into the first sibling still alive */
+ do {
+ next = child->d_child.next;
if (next == &this_parent->d_subdirs)
goto ascend;
child = list_entry(next, struct dentry, d_child);
- next = next->next;
- }
+ } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
rcu_read_unlock();
goto resume;
}
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 59fedbcf8798..86a2121828c3 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -121,7 +121,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
int len, i;
int err = -ENOMEM;
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return err;
diff --git a/fs/exec.c b/fs/exec.c
index 49a1c61433b7..1977c2a553ac 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -659,6 +659,9 @@ int setup_arg_pages(struct linux_binprm *bprm,
if (stack_base > STACK_SIZE_MAX)
stack_base = STACK_SIZE_MAX;
+ /* Add space for stack randomization. */
+ stack_base += (STACK_RND_MASK << PAGE_SHIFT);
+
/* Make sure we didn't let the argument array grow too large. */
if (vma->vm_end - vma->vm_start > stack_base)
return -ENOMEM;
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 18228c201f7f..024f2284d3f6 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -64,8 +64,8 @@ config EXT4_FS_SECURITY
If you are not using a security module that requires using
extended attributes for file security labels, say N.
-config EXT4_FS_ENCRYPTION
- bool "Ext4 Encryption"
+config EXT4_ENCRYPTION
+ tristate "Ext4 Encryption"
depends on EXT4_FS
select CRYPTO_AES
select CRYPTO_CBC
@@ -81,6 +81,11 @@ config EXT4_FS_ENCRYPTION
efficient since it avoids caching the encrypted and
decrypted pages in the page cache.
+config EXT4_FS_ENCRYPTION
+ bool
+ default y
+ depends on EXT4_ENCRYPTION
+
config EXT4_DEBUG
bool "EXT4 debugging support"
depends on EXT4_FS
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
index ca2f5948c1ac..fded02f72299 100644
--- a/fs/ext4/crypto_fname.c
+++ b/fs/ext4/crypto_fname.c
@@ -66,6 +66,7 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
int res = 0;
char iv[EXT4_CRYPTO_BLOCK_SIZE];
struct scatterlist sg[1];
+ int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
char *workbuf;
if (iname->len <= 0 || iname->len > ctx->lim)
@@ -73,6 +74,7 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
ciphertext_len = (iname->len < EXT4_CRYPTO_BLOCK_SIZE) ?
EXT4_CRYPTO_BLOCK_SIZE : iname->len;
+ ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding);
ciphertext_len = (ciphertext_len > ctx->lim)
? ctx->lim : ciphertext_len;
@@ -101,7 +103,7 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
/* Create encryption request */
sg_init_table(sg, 1);
sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0);
- ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv);
+ ablkcipher_request_set_crypt(req, sg, sg, ciphertext_len, iv);
res = crypto_ablkcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
BUG_ON(req->base.data != &ecr);
@@ -198,106 +200,57 @@ static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx,
return oname->len;
}
+static const char *lookup_table =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+
/**
* ext4_fname_encode_digest() -
*
* Encodes the input digest using characters from the set [a-zA-Z0-9_+].
* The encoded string is roughly 4/3 times the size of the input string.
*/
-int ext4_fname_encode_digest(char *dst, char *src, u32 len)
+static int digest_encode(const char *src, int len, char *dst)
{
- static const char *lookup_table =
- "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_+";
- u32 current_chunk, num_chunks, i;
- char tmp_buf[3];
- u32 c0, c1, c2, c3;
-
- current_chunk = 0;
- num_chunks = len/3;
- for (i = 0; i < num_chunks; i++) {
- c0 = src[3*i] & 0x3f;
- c1 = (((src[3*i]>>6)&0x3) | ((src[3*i+1] & 0xf)<<2)) & 0x3f;
- c2 = (((src[3*i+1]>>4)&0xf) | ((src[3*i+2] & 0x3)<<4)) & 0x3f;
- c3 = (src[3*i+2]>>2) & 0x3f;
- dst[4*i] = lookup_table[c0];
- dst[4*i+1] = lookup_table[c1];
- dst[4*i+2] = lookup_table[c2];
- dst[4*i+3] = lookup_table[c3];
- }
- if (i*3 < len) {
- memset(tmp_buf, 0, 3);
- memcpy(tmp_buf, &src[3*i], len-3*i);
- c0 = tmp_buf[0] & 0x3f;
- c1 = (((tmp_buf[0]>>6)&0x3) | ((tmp_buf[1] & 0xf)<<2)) & 0x3f;
- c2 = (((tmp_buf[1]>>4)&0xf) | ((tmp_buf[2] & 0x3)<<4)) & 0x3f;
- c3 = (tmp_buf[2]>>2) & 0x3f;
- dst[4*i] = lookup_table[c0];
- dst[4*i+1] = lookup_table[c1];
- dst[4*i+2] = lookup_table[c2];
- dst[4*i+3] = lookup_table[c3];
+ int i = 0, bits = 0, ac = 0;
+ char *cp = dst;
+
+ while (i < len) {
+ ac += (((unsigned char) src[i]) << bits);
+ bits += 8;
+ do {
+ *cp++ = lookup_table[ac & 0x3f];
+ ac >>= 6;
+ bits -= 6;
+ } while (bits >= 6);
i++;
}
- return (i * 4);
+ if (bits)
+ *cp++ = lookup_table[ac & 0x3f];
+ return cp - dst;
}
-/**
- * ext4_fname_hash() -
- *
- * This function computes the hash of the input filename, and sets the output
- * buffer to the *encoded* digest. It returns the length of the digest as its
- * return value. Errors are returned as negative numbers. We trust the caller
- * to allocate sufficient memory to oname string.
- */
-static int ext4_fname_hash(struct ext4_fname_crypto_ctx *ctx,
- const struct ext4_str *iname,
- struct ext4_str *oname)
+static int digest_decode(const char *src, int len, char *dst)
{
- struct scatterlist sg;
- struct hash_desc desc = {
- .tfm = (struct crypto_hash *)ctx->htfm,
- .flags = CRYPTO_TFM_REQ_MAY_SLEEP
- };
- int res = 0;
-
- if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) {
- res = ext4_fname_encode_digest(oname->name, iname->name,
- iname->len);
- oname->len = res;
- return res;
- }
-
- sg_init_one(&sg, iname->name, iname->len);
- res = crypto_hash_init(&desc);
- if (res) {
- printk(KERN_ERR
- "%s: Error initializing crypto hash; res = [%d]\n",
- __func__, res);
- goto out;
- }
- res = crypto_hash_update(&desc, &sg, iname->len);
- if (res) {
- printk(KERN_ERR
- "%s: Error updating crypto hash; res = [%d]\n",
- __func__, res);
- goto out;
- }
- res = crypto_hash_final(&desc,
- &oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE]);
- if (res) {
- printk(KERN_ERR
- "%s: Error finalizing crypto hash; res = [%d]\n",
- __func__, res);
- goto out;
+ int i = 0, bits = 0, ac = 0;
+ const char *p;
+ char *cp = dst;
+
+ while (i < len) {
+ p = strchr(lookup_table, src[i]);
+ if (p == NULL || src[i] == 0)
+ return -2;
+ ac += (p - lookup_table) << bits;
+ bits += 6;
+ if (bits >= 8) {
+ *cp++ = ac & 0xff;
+ ac >>= 8;
+ bits -= 8;
+ }
+ i++;
}
- /* Encode the digest as a printable string--this will increase the
- * size of the digest */
- oname->name[0] = 'I';
- res = ext4_fname_encode_digest(oname->name+1,
- &oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE],
- EXT4_FNAME_CRYPTO_DIGEST_SIZE) + 1;
- oname->len = res;
-out:
- return res;
+ if (ac)
+ return -1;
+ return cp - dst;
}
/**
@@ -405,6 +358,7 @@ struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(
if (IS_ERR(ctx))
return ctx;
+ ctx->flags = ei->i_crypt_policy_flags;
if (ctx->has_valid_key) {
if (ctx->key.mode != EXT4_ENCRYPTION_MODE_AES_256_CTS) {
printk_once(KERN_WARNING
@@ -517,6 +471,7 @@ int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
u32 namelen)
{
u32 ciphertext_len;
+ int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
if (ctx == NULL)
return -EIO;
@@ -524,6 +479,7 @@ int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
return -EACCES;
ciphertext_len = (namelen < EXT4_CRYPTO_BLOCK_SIZE) ?
EXT4_CRYPTO_BLOCK_SIZE : namelen;
+ ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding);
ciphertext_len = (ciphertext_len > ctx->lim)
? ctx->lim : ciphertext_len;
return (int) ciphertext_len;
@@ -539,10 +495,13 @@ int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
u32 ilen, struct ext4_str *crypto_str)
{
unsigned int olen;
+ int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
if (!ctx)
return -EIO;
- olen = ext4_fname_crypto_round_up(ilen, EXT4_CRYPTO_BLOCK_SIZE);
+ if (padding < EXT4_CRYPTO_BLOCK_SIZE)
+ padding = EXT4_CRYPTO_BLOCK_SIZE;
+ olen = ext4_fname_crypto_round_up(ilen, padding);
crypto_str->len = olen;
if (olen < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2)
olen = EXT4_FNAME_CRYPTO_DIGEST_SIZE*2;
@@ -571,9 +530,13 @@ void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str)
* ext4_fname_disk_to_usr() - converts a filename from disk space to user space
*/
int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
- const struct ext4_str *iname,
- struct ext4_str *oname)
+ struct dx_hash_info *hinfo,
+ const struct ext4_str *iname,
+ struct ext4_str *oname)
{
+ char buf[24];
+ int ret;
+
if (ctx == NULL)
return -EIO;
if (iname->len < 3) {
@@ -587,18 +550,33 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
}
if (ctx->has_valid_key)
return ext4_fname_decrypt(ctx, iname, oname);
- else
- return ext4_fname_hash(ctx, iname, oname);
+
+ if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) {
+ ret = digest_encode(iname->name, iname->len, oname->name);
+ oname->len = ret;
+ return ret;
+ }
+ if (hinfo) {
+ memcpy(buf, &hinfo->hash, 4);
+ memcpy(buf+4, &hinfo->minor_hash, 4);
+ } else
+ memset(buf, 0, 8);
+ memcpy(buf + 8, iname->name + iname->len - 16, 16);
+ oname->name[0] = '_';
+ ret = digest_encode(buf, 24, oname->name+1);
+ oname->len = ret + 1;
+ return ret + 1;
}
int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+ struct dx_hash_info *hinfo,
const struct ext4_dir_entry_2 *de,
struct ext4_str *oname)
{
struct ext4_str iname = {.name = (unsigned char *) de->name,
.len = de->name_len };
- return _ext4_fname_disk_to_usr(ctx, &iname, oname);
+ return _ext4_fname_disk_to_usr(ctx, hinfo, &iname, oname);
}
@@ -640,10 +618,11 @@ int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
const struct qstr *iname,
struct dx_hash_info *hinfo)
{
- struct ext4_str tmp, tmp2;
+ struct ext4_str tmp;
int ret = 0;
+ char buf[EXT4_FNAME_CRYPTO_DIGEST_SIZE+1];
- if (!ctx || !ctx->has_valid_key ||
+ if (!ctx ||
((iname->name[0] == '.') &&
((iname->len == 1) ||
((iname->name[1] == '.') && (iname->len == 2))))) {
@@ -651,59 +630,90 @@ int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
return 0;
}
+ if (!ctx->has_valid_key && iname->name[0] == '_') {
+ if (iname->len != 33)
+ return -ENOENT;
+ ret = digest_decode(iname->name+1, iname->len, buf);
+ if (ret != 24)
+ return -ENOENT;
+ memcpy(&hinfo->hash, buf, 4);
+ memcpy(&hinfo->minor_hash, buf + 4, 4);
+ return 0;
+ }
+
+ if (!ctx->has_valid_key && iname->name[0] != '_') {
+ if (iname->len > 43)
+ return -ENOENT;
+ ret = digest_decode(iname->name, iname->len, buf);
+ ext4fs_dirhash(buf, ret, hinfo);
+ return 0;
+ }
+
/* First encrypt the plaintext name */
ret = ext4_fname_crypto_alloc_buffer(ctx, iname->len, &tmp);
if (ret < 0)
return ret;
ret = ext4_fname_encrypt(ctx, iname, &tmp);
- if (ret < 0)
- goto out;
-
- tmp2.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1;
- tmp2.name = kmalloc(tmp2.len + 1, GFP_KERNEL);
- if (tmp2.name == NULL) {
- ret = -ENOMEM;
- goto out;
+ if (ret >= 0) {
+ ext4fs_dirhash(tmp.name, tmp.len, hinfo);
+ ret = 0;
}
- ret = ext4_fname_hash(ctx, &tmp, &tmp2);
- if (ret > 0)
- ext4fs_dirhash(tmp2.name, tmp2.len, hinfo);
- ext4_fname_crypto_free_buffer(&tmp2);
-out:
ext4_fname_crypto_free_buffer(&tmp);
return ret;
}
-/**
- * ext4_fname_disk_to_htree() - converts a filename from disk space to htree-access string
- */
-int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx,
- const struct ext4_dir_entry_2 *de,
- struct dx_hash_info *hinfo)
+int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr,
+ int len, const char * const name,
+ struct ext4_dir_entry_2 *de)
{
- struct ext4_str iname = {.name = (unsigned char *) de->name,
- .len = de->name_len};
- struct ext4_str tmp;
- int ret;
+ int ret = -ENOENT;
+ int bigname = (*name == '_');
- if (!ctx ||
- ((iname.name[0] == '.') &&
- ((iname.len == 1) ||
- ((iname.name[1] == '.') && (iname.len == 2))))) {
- ext4fs_dirhash(iname.name, iname.len, hinfo);
- return 0;
+ if (ctx->has_valid_key) {
+ if (cstr->name == NULL) {
+ struct qstr istr;
+
+ ret = ext4_fname_crypto_alloc_buffer(ctx, len, cstr);
+ if (ret < 0)
+ goto errout;
+ istr.name = name;
+ istr.len = len;
+ ret = ext4_fname_encrypt(ctx, &istr, cstr);
+ if (ret < 0)
+ goto errout;
+ }
+ } else {
+ if (cstr->name == NULL) {
+ cstr->name = kmalloc(32, GFP_KERNEL);
+ if (cstr->name == NULL)
+ return -ENOMEM;
+ if ((bigname && (len != 33)) ||
+ (!bigname && (len > 43)))
+ goto errout;
+ ret = digest_decode(name+bigname, len-bigname,
+ cstr->name);
+ if (ret < 0) {
+ ret = -ENOENT;
+ goto errout;
+ }
+ cstr->len = ret;
+ }
+ if (bigname) {
+ if (de->name_len < 16)
+ return 0;
+ ret = memcmp(de->name + de->name_len - 16,
+ cstr->name + 8, 16);
+ return (ret == 0) ? 1 : 0;
+ }
}
-
- tmp.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1;
- tmp.name = kmalloc(tmp.len + 1, GFP_KERNEL);
- if (tmp.name == NULL)
- return -ENOMEM;
-
- ret = ext4_fname_hash(ctx, &iname, &tmp);
- if (ret > 0)
- ext4fs_dirhash(tmp.name, tmp.len, hinfo);
- ext4_fname_crypto_free_buffer(&tmp);
+ if (de->name_len != cstr->len)
+ return 0;
+ ret = memcmp(de->name, cstr->name, cstr->len);
+ return (ret == 0) ? 1 : 0;
+errout:
+ kfree(cstr->name);
+ cstr->name = NULL;
return ret;
}
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index c8392af8abbb..52170d0b7c40 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -110,6 +110,7 @@ int ext4_generate_encryption_key(struct inode *inode)
}
res = 0;
+ ei->i_crypt_policy_flags = ctx.flags;
if (S_ISREG(inode->i_mode))
crypt_key->mode = ctx.contents_encryption_mode;
else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
index 30eaf9e9864a..a6d6291aea16 100644
--- a/fs/ext4/crypto_policy.c
+++ b/fs/ext4/crypto_policy.c
@@ -37,6 +37,8 @@ static int ext4_is_encryption_context_consistent_with_policy(
return 0;
return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
+ (ctx.flags ==
+ policy->flags) &&
(ctx.contents_encryption_mode ==
policy->contents_encryption_mode) &&
(ctx.filenames_encryption_mode ==
@@ -56,25 +58,25 @@ static int ext4_create_encryption_context_from_policy(
printk(KERN_WARNING
"%s: Invalid contents encryption mode %d\n", __func__,
policy->contents_encryption_mode);
- res = -EINVAL;
- goto out;
+ return -EINVAL;
}
if (!ext4_valid_filenames_enc_mode(policy->filenames_encryption_mode)) {
printk(KERN_WARNING
"%s: Invalid filenames encryption mode %d\n", __func__,
policy->filenames_encryption_mode);
- res = -EINVAL;
- goto out;
+ return -EINVAL;
}
+ if (policy->flags & ~EXT4_POLICY_FLAGS_VALID)
+ return -EINVAL;
ctx.contents_encryption_mode = policy->contents_encryption_mode;
ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
+ ctx.flags = policy->flags;
BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE);
get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
sizeof(ctx), 0);
-out:
if (!res)
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
return res;
@@ -115,6 +117,7 @@ int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy)
policy->version = 0;
policy->contents_encryption_mode = ctx.contents_encryption_mode;
policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
+ policy->flags = ctx.flags;
memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
EXT4_KEY_DESCRIPTOR_SIZE);
return 0;
@@ -176,6 +179,7 @@ int ext4_inherit_context(struct inode *parent, struct inode *child)
EXT4_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode =
EXT4_ENCRYPTION_MODE_AES_256_CTS;
+ ctx.flags = 0;
memset(ctx.master_key_descriptor, 0x42,
EXT4_KEY_DESCRIPTOR_SIZE);
res = 0;
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 61db51a5ce4c..5665d82d2332 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -249,7 +249,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
} else {
/* Directory is encrypted */
err = ext4_fname_disk_to_usr(enc_ctx,
- de, &fname_crypto_str);
+ NULL, de, &fname_crypto_str);
if (err < 0)
goto errout;
if (!dir_emit(ctx,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ef267adce19a..9a83f149ac85 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -911,6 +911,7 @@ struct ext4_inode_info {
/* on-disk additional length */
__u16 i_extra_isize;
+ char i_crypt_policy_flags;
/* Indicate the inline data space. */
u16 i_inline_off;
@@ -1066,12 +1067,6 @@ extern void ext4_set_bits(void *bm, int cur, int len);
/* Metadata checksum algorithm codes */
#define EXT4_CRC32C_CHKSUM 1
-/* Encryption algorithms */
-#define EXT4_ENCRYPTION_MODE_INVALID 0
-#define EXT4_ENCRYPTION_MODE_AES_256_XTS 1
-#define EXT4_ENCRYPTION_MODE_AES_256_GCM 2
-#define EXT4_ENCRYPTION_MODE_AES_256_CBC 3
-
/*
* Structure of the super block
*/
@@ -2093,9 +2088,11 @@ u32 ext4_fname_crypto_round_up(u32 size, u32 blksize);
int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
u32 ilen, struct ext4_str *crypto_str);
int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+ struct dx_hash_info *hinfo,
const struct ext4_str *iname,
struct ext4_str *oname);
int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+ struct dx_hash_info *hinfo,
const struct ext4_dir_entry_2 *de,
struct ext4_str *oname);
int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
@@ -2104,11 +2101,12 @@ int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
const struct qstr *iname,
struct dx_hash_info *hinfo);
-int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx,
- const struct ext4_dir_entry_2 *de,
- struct dx_hash_info *hinfo);
int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
u32 namelen);
+int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr,
+ int len, const char * const name,
+ struct ext4_dir_entry_2 *de);
+
#ifdef CONFIG_EXT4_FS_ENCRYPTION
void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx);
@@ -2891,7 +2889,6 @@ extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags);
extern int ext4_ext_calc_metadata_amount(struct inode *inode,
ext4_lblk_t lblocks);
-extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
int num,
struct ext4_ext_path *path);
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
index c2ba35a914b6..d75159c101ce 100644
--- a/fs/ext4/ext4_crypto.h
+++ b/fs/ext4/ext4_crypto.h
@@ -20,12 +20,20 @@ struct ext4_encryption_policy {
char version;
char contents_encryption_mode;
char filenames_encryption_mode;
+ char flags;
char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
} __attribute__((__packed__));
#define EXT4_ENCRYPTION_CONTEXT_FORMAT_V1 1
#define EXT4_KEY_DERIVATION_NONCE_SIZE 16
+#define EXT4_POLICY_FLAGS_PAD_4 0x00
+#define EXT4_POLICY_FLAGS_PAD_8 0x01
+#define EXT4_POLICY_FLAGS_PAD_16 0x02
+#define EXT4_POLICY_FLAGS_PAD_32 0x03
+#define EXT4_POLICY_FLAGS_PAD_MASK 0x03
+#define EXT4_POLICY_FLAGS_VALID 0x03
+
/**
* Encryption context for inode
*
@@ -41,7 +49,7 @@ struct ext4_encryption_context {
char format;
char contents_encryption_mode;
char filenames_encryption_mode;
- char reserved;
+ char flags;
char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE];
} __attribute__((__packed__));
@@ -120,6 +128,7 @@ struct ext4_fname_crypto_ctx {
struct crypto_hash *htfm;
struct page *workpage;
struct ext4_encryption_key key;
+ unsigned flags : 8;
unsigned has_valid_key : 1;
unsigned ctfm_key_is_ready : 1;
};
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 3445035c7e01..d41843181818 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -87,6 +87,12 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
ext4_put_nojournal(handle);
return 0;
}
+
+ if (!handle->h_transaction) {
+ err = jbd2_journal_stop(handle);
+ return handle->h_err ? handle->h_err : err;
+ }
+
sb = handle->h_transaction->t_journal->j_private;
err = handle->h_err;
rc = jbd2_journal_stop(handle);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 973816bfe4a9..e003a1e81dc3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -377,7 +377,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
ext4_lblk_t last = lblock + len - 1;
- if (lblock > last)
+ if (len == 0 || lblock > last)
return 0;
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
}
@@ -4927,13 +4927,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (ret)
return ret;
- /*
- * currently supporting (pre)allocate mode for extent-based
- * files _only_
- */
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- return -EOPNOTSUPP;
-
if (mode & FALLOC_FL_COLLAPSE_RANGE)
return ext4_collapse_range(inode, offset, len);
@@ -4955,6 +4948,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
mutex_lock(&inode->i_mutex);
+ /*
+ * We only support preallocation for extent-based files only
+ */
+ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode)) {
new_size = offset + len;
@@ -5395,6 +5396,14 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
loff_t new_size, ioffset;
int ret;
+ /*
+ * We need to test this early because xfstests assumes that a
+ * collapse range of (0, 1) will return EOPNOTSUPP if the file
+ * system does not support collapse range.
+ */
+ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+ return -EOPNOTSUPP;
+
/* Collapse range works only on fs block size aligned offsets. */
if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
len & (EXT4_CLUSTER_SIZE(sb) - 1))
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index d33d5a6852b9..26724aeece73 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -703,6 +703,14 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
BUG_ON(end < lblk);
+ if ((status & EXTENT_STATUS_DELAYED) &&
+ (status & EXTENT_STATUS_WRITTEN)) {
+ ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as "
+ " delayed and written which can potentially "
+ " cause data loss.\n", lblk, len);
+ WARN_ON(1);
+ }
+
newes.es_lblk = lblk;
newes.es_len = len;
ext4_es_store_pblock_status(&newes, pblk, status);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cbd0654a2675..0554b0b5957b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -531,6 +531,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
+ !(status & EXTENT_STATUS_WRITTEN) &&
ext4_find_delalloc_range(inode, map->m_lblk,
map->m_lblk + map->m_len - 1))
status |= EXTENT_STATUS_DELAYED;
@@ -635,6 +636,7 @@ found:
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
+ !(status & EXTENT_STATUS_WRITTEN) &&
ext4_find_delalloc_range(inode, map->m_lblk,
map->m_lblk + map->m_len - 1))
status |= EXTENT_STATUS_DELAYED;
@@ -4343,7 +4345,7 @@ static void ext4_update_other_inodes_time(struct super_block *sb,
int inode_size = EXT4_INODE_SIZE(sb);
oi.orig_ino = orig_ino;
- ino = orig_ino & ~(inodes_per_block - 1);
+ ino = (orig_ino & ~(inodes_per_block - 1)) + 1;
for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
if (ino == orig_ino)
continue;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 7223b0b4bc38..814f3beb4369 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -640,7 +640,7 @@ static struct stats dx_show_leaf(struct inode *dir,
ext4_put_fname_crypto_ctx(&ctx);
ctx = NULL;
}
- res = ext4_fname_disk_to_usr(ctx, de,
+ res = ext4_fname_disk_to_usr(ctx, NULL, de,
&fname_crypto_str);
if (res < 0) {
printk(KERN_WARNING "Error "
@@ -653,15 +653,8 @@ static struct stats dx_show_leaf(struct inode *dir,
name = fname_crypto_str.name;
len = fname_crypto_str.len;
}
- res = ext4_fname_disk_to_hash(ctx, de,
- &h);
- if (res < 0) {
- printk(KERN_WARNING "Error "
- "converting filename "
- "from disk to htree"
- "\n");
- h.hash = 0xDEADBEEF;
- }
+ ext4fs_dirhash(de->name, de->name_len,
+ &h);
printk("%*.s:(E)%x.%u ", len, name,
h.hash, (unsigned) ((char *) de
- base));
@@ -1008,15 +1001,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
/* silently ignore the rest of the block */
break;
}
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
- err = ext4_fname_disk_to_hash(ctx, de, hinfo);
- if (err < 0) {
- count = err;
- goto errout;
- }
-#else
ext4fs_dirhash(de->name, de->name_len, hinfo);
-#endif
if ((hinfo->hash < start_hash) ||
((hinfo->hash == start_hash) &&
(hinfo->minor_hash < start_minor_hash)))
@@ -1032,7 +1017,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
&tmp_str);
} else {
/* Directory is encrypted */
- err = ext4_fname_disk_to_usr(ctx, de,
+ err = ext4_fname_disk_to_usr(ctx, hinfo, de,
&fname_crypto_str);
if (err < 0) {
count = err;
@@ -1193,26 +1178,10 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
int count = 0;
char *base = (char *) de;
struct dx_hash_info h = *hinfo;
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
- struct ext4_fname_crypto_ctx *ctx = NULL;
- int err;
-
- ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-#endif
while ((char *) de < base + blocksize) {
if (de->name_len && de->inode) {
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
- err = ext4_fname_disk_to_hash(ctx, de, &h);
- if (err < 0) {
- ext4_put_fname_crypto_ctx(&ctx);
- return err;
- }
-#else
ext4fs_dirhash(de->name, de->name_len, &h);
-#endif
map_tail--;
map_tail->hash = h.hash;
map_tail->offs = ((char *) de - base)>>2;
@@ -1223,9 +1192,6 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
/* XXX: do we need to check rec_len == 0 case? -Chris */
de = ext4_next_entry(de, blocksize);
}
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
- ext4_put_fname_crypto_ctx(&ctx);
-#endif
return count;
}
@@ -1287,16 +1253,8 @@ static inline int ext4_match(struct ext4_fname_crypto_ctx *ctx,
return 0;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
- if (ctx) {
- /* Directory is encrypted */
- res = ext4_fname_disk_to_usr(ctx, de, fname_crypto_str);
- if (res < 0)
- return res;
- if (len != res)
- return 0;
- res = memcmp(name, fname_crypto_str->name, len);
- return (res == 0) ? 1 : 0;
- }
+ if (ctx)
+ return ext4_fname_match(ctx, fname_crypto_str, len, name, de);
#endif
if (len != de->name_len)
return 0;
@@ -1324,16 +1282,6 @@ int search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
if (IS_ERR(ctx))
return -1;
- if (ctx != NULL) {
- /* Allocate buffer to hold maximum name length */
- res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
- &fname_crypto_str);
- if (res < 0) {
- ext4_put_fname_crypto_ctx(&ctx);
- return -1;
- }
- }
-
de = (struct ext4_dir_entry_2 *)search_buf;
dlimit = search_buf + buf_size;
while ((char *) de < dlimit) {
@@ -1872,14 +1820,6 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
return res;
}
reclen = EXT4_DIR_REC_LEN(res);
-
- /* Allocate buffer to hold maximum name length */
- res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
- &fname_crypto_str);
- if (res < 0) {
- ext4_put_fname_crypto_ctx(&ctx);
- return -1;
- }
}
de = (struct ext4_dir_entry_2 *)buf;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 8a8ec6293b19..cf0c472047e3 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1432,12 +1432,15 @@ static int ext4_flex_group_add(struct super_block *sb,
goto exit;
/*
* We will always be modifying at least the superblock and GDT
- * block. If we are adding a group past the last current GDT block,
+ * blocks. If we are adding a group past the last current GDT block,
* we will also modify the inode and the dindirect block. If we
* are adding a group with superblock/GDT backups we will also
* modify each of the reserved GDT dindirect blocks.
*/
- credit = flex_gd->count * 4 + reserved_gdb;
+ credit = 3; /* sb, resize inode, resize inode dindirect */
+ /* GDT blocks */
+ credit += 1 + DIV_ROUND_UP(flex_gd->count, EXT4_DESC_PER_BLOCK(sb));
+ credit += reserved_gdb; /* Reserved GDT dindirect blocks */
handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credit);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f06d0589ddba..ca9d4a2fed41 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -294,6 +294,8 @@ static void __save_error_info(struct super_block *sb, const char *func,
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+ if (bdev_read_only(sb->s_bdev))
+ return;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
es->s_last_error_time = cpu_to_le32(get_seconds());
strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 19f78f20975e..187b78920314 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -74,7 +74,7 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
goto errout;
}
pstr.name = paddr;
- res = _ext4_fname_disk_to_usr(ctx, &cstr, &pstr);
+ res = _ext4_fname_disk_to_usr(ctx, NULL, &cstr, &pstr);
if (res < 0)
goto errout;
/* Null-terminate the name */
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b91b0e10678e..1e1aae669fa8 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1513,6 +1513,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
{
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ bool locked = false;
int ret;
long diff;
@@ -1533,7 +1534,13 @@ static int f2fs_write_data_pages(struct address_space *mapping,
diff = nr_pages_to_write(sbi, DATA, wbc);
+ if (!S_ISDIR(inode->i_mode)) {
+ mutex_lock(&sbi->writepages);
+ locked = true;
+ }
ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
+ if (locked)
+ mutex_unlock(&sbi->writepages);
f2fs_submit_merged_bio(sbi, DATA, WRITE);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d8921cf2ba9a..8de34ab6d5b1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -625,6 +625,7 @@ struct f2fs_sb_info {
struct mutex cp_mutex; /* checkpoint procedure lock */
struct rw_semaphore cp_rwsem; /* blocking FS operations */
struct rw_semaphore node_write; /* locking node writes */
+ struct mutex writepages; /* mutex for writepages() */
wait_queue_head_t cp_wait;
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 7e3794edae42..658e8079aaf9 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -298,16 +298,14 @@ fail:
static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd)
{
- struct page *page;
+ struct page *page = page_follow_link_light(dentry, nd);
- page = page_follow_link_light(dentry, nd);
- if (IS_ERR(page))
+ if (IS_ERR_OR_NULL(page))
return page;
/* this is broken symlink case */
if (*nd_get_link(nd) == 0) {
- kunmap(page);
- page_cache_release(page);
+ page_put_link(dentry, nd, page);
return ERR_PTR(-ENOENT);
}
return page;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 160b88346b24..b2dd1b01f076 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1035,6 +1035,7 @@ try_onemore:
sbi->raw_super = raw_super;
sbi->raw_super_buf = raw_super_buf;
mutex_init(&sbi->gc_mutex);
+ mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
init_rwsem(&sbi->node_write);
clear_sbi_flag(sbi, SBI_POR_DOING);
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 999ff5c3cab0..d59712dfa3e7 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -195,8 +195,9 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
goto out_err;
}
/* copy the full handle */
- if (copy_from_user(handle, ufh,
- sizeof(struct file_handle) +
+ *handle = f_handle;
+ if (copy_from_user(&handle->f_handle,
+ &ufh->f_handle,
f_handle.handle_bytes)) {
retval = -EFAULT;
goto out_handle;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index ef263174acd2..07d8d8f52faf 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -581,7 +581,7 @@ static int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
if (name == NULL)
goto out_put;
- fd = file_create(name, mode & S_IFMT);
+ fd = file_create(name, mode & 0777);
if (fd < 0)
error = fd;
else
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index b5128c6e63ad..a9079d035ae5 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -842,15 +842,23 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
{
jbd2_journal_revoke_header_t *header;
int offset, max;
+ int csum_size = 0;
+ __u32 rcount;
int record_len = 4;
header = (jbd2_journal_revoke_header_t *) bh->b_data;
offset = sizeof(jbd2_journal_revoke_header_t);
- max = be32_to_cpu(header->r_count);
+ rcount = be32_to_cpu(header->r_count);
if (!jbd2_revoke_block_csum_verify(journal, header))
return -EINVAL;
+ if (jbd2_journal_has_csum_v2or3(journal))
+ csum_size = sizeof(struct jbd2_journal_revoke_tail);
+ if (rcount > journal->j_blocksize - csum_size)
+ return -EINVAL;
+ max = rcount;
+
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
record_len = 8;
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index c6cbaef2bda1..14214da80eb8 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -577,7 +577,7 @@ static void write_one_revoke_record(journal_t *journal,
{
int csum_size = 0;
struct buffer_head *descriptor;
- int offset;
+ int sz, offset;
journal_header_t *header;
/* If we are already aborting, this all becomes a noop. We
@@ -594,9 +594,14 @@ static void write_one_revoke_record(journal_t *journal,
if (jbd2_journal_has_csum_v2or3(journal))
csum_size = sizeof(struct jbd2_journal_revoke_tail);
+ if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
+ sz = 8;
+ else
+ sz = 4;
+
/* Make sure we have a descriptor with space left for the record */
if (descriptor) {
- if (offset >= journal->j_blocksize - csum_size) {
+ if (offset + sz > journal->j_blocksize - csum_size) {
flush_descriptor(journal, descriptor, offset, write_op);
descriptor = NULL;
}
@@ -619,16 +624,13 @@ static void write_one_revoke_record(journal_t *journal,
*descriptorp = descriptor;
}
- if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) {
+ if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
* ((__be64 *)(&descriptor->b_data[offset])) =
cpu_to_be64(record->blocknr);
- offset += 8;
-
- } else {
+ else
* ((__be32 *)(&descriptor->b_data[offset])) =
cpu_to_be32(record->blocknr);
- offset += 4;
- }
+ offset += sz;
*offsetp = offset;
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 5f09370c90a8..ff2f2e6ad311 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -551,7 +551,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
int result;
int wanted;
- WARN_ON(!transaction);
if (is_handle_aborted(handle))
return -EROFS;
journal = transaction->t_journal;
@@ -627,7 +626,6 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
tid_t tid;
int need_to_start, ret;
- WARN_ON(!transaction);
/* If we've had an abort of any type, don't even think about
* actually doing the restart! */
if (is_handle_aborted(handle))
@@ -785,7 +783,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
int need_copy = 0;
unsigned long start_lock, time_lock;
- WARN_ON(!transaction);
if (is_handle_aborted(handle))
return -EROFS;
journal = transaction->t_journal;
@@ -1051,7 +1048,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
int err;
jbd_debug(5, "journal_head %p\n", jh);
- WARN_ON(!transaction);
err = -EROFS;
if (is_handle_aborted(handle))
goto out;
@@ -1266,7 +1262,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
struct journal_head *jh;
int ret = 0;
- WARN_ON(!transaction);
if (is_handle_aborted(handle))
return -EROFS;
journal = transaction->t_journal;
@@ -1397,7 +1392,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
int err = 0;
int was_modified = 0;
- WARN_ON(!transaction);
if (is_handle_aborted(handle))
return -EROFS;
journal = transaction->t_journal;
@@ -1530,8 +1524,22 @@ int jbd2_journal_stop(handle_t *handle)
tid_t tid;
pid_t pid;
- if (!transaction)
- goto free_and_exit;
+ if (!transaction) {
+ /*
+ * Handle is already detached from the transaction so
+ * there is nothing to do other than decrease a refcount,
+ * or free the handle if refcount drops to zero
+ */
+ if (--handle->h_ref > 0) {
+ jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
+ handle->h_ref);
+ return err;
+ } else {
+ if (handle->h_rsv_handle)
+ jbd2_free_handle(handle->h_rsv_handle);
+ goto free_and_exit;
+ }
+ }
journal = transaction->t_journal;
J_ASSERT(journal_current_handle() == handle);
@@ -2373,7 +2381,6 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
transaction_t *transaction = handle->h_transaction;
journal_t *journal;
- WARN_ON(!transaction);
if (is_handle_aborted(handle))
return -EROFS;
journal = transaction->t_journal;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index f131fc23ffc4..fffca9517321 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -518,7 +518,14 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
if (!kn)
goto err_out1;
- ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
+ /*
+ * If the ino of the sysfs entry created for a kmem cache gets
+ * allocated from an ida layer, which is accounted to the memcg that
+ * owns the cache, the memcg will get pinned forever. So do not account
+ * ino ida allocations.
+ */
+ ret = ida_simple_get(&root->ino_ida, 1, 0,
+ GFP_KERNEL | __GFP_NOACCOUNT);
if (ret < 0)
goto err_out2;
kn->ino = ret;
diff --git a/fs/namei.c b/fs/namei.c
index 4a8d998b7274..fe30d3be43a8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1415,6 +1415,7 @@ static int lookup_fast(struct nameidata *nd,
*/
if (nd->flags & LOOKUP_RCU) {
unsigned seq;
+ bool negative;
dentry = __d_lookup_rcu(parent, &nd->last, &seq);
if (!dentry)
goto unlazy;
@@ -1424,8 +1425,11 @@ static int lookup_fast(struct nameidata *nd,
* the dentry name information from lookup.
*/
*inode = dentry->d_inode;
+ negative = d_is_negative(dentry);
if (read_seqcount_retry(&dentry->d_seq, seq))
return -ECHILD;
+ if (negative)
+ return -ENOENT;
/*
* This sequence count validates that the parent had no
@@ -1472,6 +1476,10 @@ unlazy:
goto need_lookup;
}
+ if (unlikely(d_is_negative(dentry))) {
+ dput(dentry);
+ return -ENOENT;
+ }
path->mnt = mnt;
path->dentry = dentry;
err = follow_managed(path, nd->flags);
@@ -1583,10 +1591,10 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
goto out_err;
inode = path->dentry->d_inode;
+ err = -ENOENT;
+ if (d_is_negative(path->dentry))
+ goto out_path_put;
}
- err = -ENOENT;
- if (d_is_negative(path->dentry))
- goto out_path_put;
if (should_follow_link(path->dentry, follow)) {
if (nd->flags & LOOKUP_RCU) {
@@ -3036,14 +3044,13 @@ retry_lookup:
BUG_ON(nd->flags & LOOKUP_RCU);
inode = path->dentry->d_inode;
-finish_lookup:
- /* we _can_ be in RCU mode here */
error = -ENOENT;
if (d_is_negative(path->dentry)) {
path_to_nameidata(path, nd);
goto out;
}
-
+finish_lookup:
+ /* we _can_ be in RCU mode here */
if (should_follow_link(path->dentry, !symlink_ok)) {
if (nd->flags & LOOKUP_RCU) {
if (unlikely(nd->path.mnt != path->mnt ||
@@ -3226,7 +3233,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
if (unlikely(file->f_flags & __O_TMPFILE)) {
error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened);
- goto out;
+ goto out2;
}
error = path_init(dfd, pathname, flags, nd);
@@ -3256,6 +3263,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
}
out:
path_cleanup(nd);
+out2:
if (!(opened & FILE_OPENED)) {
BUG_ON(!error);
put_filp(file);
diff --git a/fs/namespace.c b/fs/namespace.c
index 1f4f9dac6e5a..1b9e11167bae 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3179,6 +3179,12 @@ bool fs_fully_visible(struct file_system_type *type)
if (mnt->mnt.mnt_sb->s_type != type)
continue;
+ /* This mount is not fully visible if it's root directory
+ * is not the root directory of the filesystem.
+ */
+ if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
+ continue;
+
/* This mount is not fully visible if there are any child mounts
* that cover anything except for empty directories.
*/
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 45b35b9b1e36..55e1e3af23a3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -38,6 +38,7 @@
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/errno.h>
+#include <linux/file.h>
#include <linux/string.h>
#include <linux/ratelimit.h>
#include <linux/printk.h>
@@ -5604,6 +5605,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
p->server = server;
atomic_inc(&lsp->ls_count);
p->ctx = get_nfs_open_context(ctx);
+ get_file(fl->fl_file);
memcpy(&p->fl, fl, sizeof(p->fl));
return p;
out_free_seqid:
@@ -5716,6 +5718,7 @@ static void nfs4_lock_release(void *calldata)
nfs_free_seqid(data->arg.lock_seqid);
nfs4_put_lock_state(data->lsp);
put_nfs_open_context(data->ctx);
+ fput(data->fl.fl_file);
kfree(data);
dprintk("%s: done!\n", __func__);
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d12a4be613a5..dfc19f1575a1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1845,12 +1845,15 @@ int nfs_wb_all(struct inode *inode)
trace_nfs_writeback_inode_enter(inode);
ret = filemap_write_and_wait(inode->i_mapping);
- if (!ret) {
- ret = nfs_commit_inode(inode, FLUSH_SYNC);
- if (!ret)
- pnfs_sync_inode(inode, true);
- }
+ if (ret)
+ goto out;
+ ret = nfs_commit_inode(inode, FLUSH_SYNC);
+ if (ret < 0)
+ goto out;
+ pnfs_sync_inode(inode, true);
+ ret = 0;
+out:
trace_nfs_writeback_inode_exit(inode, ret);
return ret;
}
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 03d647bf195d..cdefaa331a07 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -181,6 +181,17 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
}
const struct nfsd4_layout_ops bl_layout_ops = {
+ /*
+ * Pretend that we send notification to the client. This is a blatant
+ * lie to force recent Linux clients to cache our device IDs.
+ * We rarely ever change the device ID, so the harm of leaking deviceids
+ * for a while isn't too bad. Unfortunately RFC5661 is a complete mess
+ * in this regard, but I filed errata 4119 for this a while ago, and
+ * hopefully the Linux client will eventually start caching deviceids
+ * without this again.
+ */
+ .notify_types =
+ NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
.proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo,
.encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
.proc_layoutget = nfsd4_block_proc_layoutget,
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 58277859a467..5694cfb7a47b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -224,7 +224,7 @@ static int nfs_cb_stat_to_errno(int status)
}
static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
- enum nfsstat4 *status)
+ int *status)
{
__be32 *p;
u32 op;
@@ -235,7 +235,7 @@ static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
op = be32_to_cpup(p++);
if (unlikely(op != expected))
goto out_unexpected;
- *status = be32_to_cpup(p);
+ *status = nfs_cb_stat_to_errno(be32_to_cpup(p));
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
@@ -446,22 +446,16 @@ out_overflow:
static int decode_cb_sequence4res(struct xdr_stream *xdr,
struct nfsd4_callback *cb)
{
- enum nfsstat4 nfserr;
int status;
if (cb->cb_minorversion == 0)
return 0;
- status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr);
- if (unlikely(status))
- goto out;
- if (unlikely(nfserr != NFS4_OK))
- goto out_default;
- status = decode_cb_sequence4resok(xdr, cb);
-out:
- return status;
-out_default:
- return nfs_cb_stat_to_errno(nfserr);
+ status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_status);
+ if (unlikely(status || cb->cb_status))
+ return status;
+
+ return decode_cb_sequence4resok(xdr, cb);
}
/*
@@ -524,26 +518,19 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
struct nfsd4_callback *cb)
{
struct nfs4_cb_compound_hdr hdr;
- enum nfsstat4 nfserr;
int status;
status = decode_cb_compound4res(xdr, &hdr);
if (unlikely(status))
- goto out;
+ return status;
if (cb != NULL) {
status = decode_cb_sequence4res(xdr, cb);
- if (unlikely(status))
- goto out;
+ if (unlikely(status || cb->cb_status))
+ return status;
}
- status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr);
- if (unlikely(status))
- goto out;
- if (unlikely(nfserr != NFS4_OK))
- status = nfs_cb_stat_to_errno(nfserr);
-out:
- return status;
+ return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
}
#ifdef CONFIG_NFSD_PNFS
@@ -621,24 +608,18 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
struct nfsd4_callback *cb)
{
struct nfs4_cb_compound_hdr hdr;
- enum nfsstat4 nfserr;
int status;
status = decode_cb_compound4res(xdr, &hdr);
if (unlikely(status))
- goto out;
+ return status;
+
if (cb) {
status = decode_cb_sequence4res(xdr, cb);
- if (unlikely(status))
- goto out;
+ if (unlikely(status || cb->cb_status))
+ return status;
}
- status = decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &nfserr);
- if (unlikely(status))
- goto out;
- if (unlikely(nfserr != NFS4_OK))
- status = nfs_cb_stat_to_errno(nfserr);
-out:
- return status;
+ return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
}
#endif /* CONFIG_NFSD_PNFS */
@@ -898,13 +879,6 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
if (!nfsd41_cb_get_slot(clp, task))
return;
}
- spin_lock(&clp->cl_lock);
- if (list_empty(&cb->cb_per_client)) {
- /* This is the first call, not a restart */
- cb->cb_done = false;
- list_add(&cb->cb_per_client, &clp->cl_callbacks);
- }
- spin_unlock(&clp->cl_lock);
rpc_call_start(task);
}
@@ -918,22 +892,33 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
if (clp->cl_minorversion) {
/* No need for lock, access serialized in nfsd4_cb_prepare */
- ++clp->cl_cb_session->se_cb_seq_nr;
+ if (!task->tk_status)
+ ++clp->cl_cb_session->se_cb_seq_nr;
clear_bit(0, &clp->cl_cb_slot_busy);
rpc_wake_up_next(&clp->cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
clp->cl_cb_session->se_cb_seq_nr);
}
- if (clp->cl_cb_client != task->tk_client) {
- /* We're shutting down or changing cl_cb_client; leave
- * it to nfsd4_process_cb_update to restart the call if
- * necessary. */
+ /*
+ * If the backchannel connection was shut down while this
+ * task was queued, we need to resubmit it after setting up
+ * a new backchannel connection.
+ *
+ * Note that if we lost our callback connection permanently
+ * the submission code will error out, so we don't need to
+ * handle that case here.
+ */
+ if (task->tk_flags & RPC_TASK_KILLED) {
+ task->tk_status = 0;
+ cb->cb_need_restart = true;
return;
}
- if (cb->cb_done)
- return;
+ if (cb->cb_status) {
+ WARN_ON_ONCE(task->tk_status);
+ task->tk_status = cb->cb_status;
+ }
switch (cb->cb_ops->done(cb, task)) {
case 0:
@@ -949,21 +934,17 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
default:
BUG();
}
- cb->cb_done = true;
}
static void nfsd4_cb_release(void *calldata)
{
struct nfsd4_callback *cb = calldata;
- struct nfs4_client *clp = cb->cb_clp;
-
- if (cb->cb_done) {
- spin_lock(&clp->cl_lock);
- list_del(&cb->cb_per_client);
- spin_unlock(&clp->cl_lock);
+ if (cb->cb_need_restart)
+ nfsd4_run_cb(cb);
+ else
cb->cb_ops->release(cb);
- }
+
}
static const struct rpc_call_ops nfsd4_cb_ops = {
@@ -1058,9 +1039,6 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
nfsd4_mark_cb_down(clp, err);
return;
}
- /* Yay, the callback channel's back! Restart any callbacks: */
- list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client)
- queue_work(callback_wq, &cb->cb_work);
}
static void
@@ -1071,8 +1049,12 @@ nfsd4_run_cb_work(struct work_struct *work)
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
- if (cb->cb_ops && cb->cb_ops->prepare)
- cb->cb_ops->prepare(cb);
+ if (cb->cb_need_restart) {
+ cb->cb_need_restart = false;
+ } else {
+ if (cb->cb_ops && cb->cb_ops->prepare)
+ cb->cb_ops->prepare(cb);
+ }
if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
nfsd4_process_cb_update(cb);
@@ -1084,6 +1066,15 @@ nfsd4_run_cb_work(struct work_struct *work)
cb->cb_ops->release(cb);
return;
}
+
+ /*
+ * Don't send probe messages for 4.1 or later.
+ */
+ if (!cb->cb_ops && clp->cl_minorversion) {
+ clp->cl_cb_state = NFSD4_CB_UP;
+ return;
+ }
+
cb->cb_msg.rpc_cred = clp->cl_cb_cred;
rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
@@ -1098,8 +1089,8 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_msg.rpc_resp = cb;
cb->cb_ops = ops;
INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
- INIT_LIST_HEAD(&cb->cb_per_client);
- cb->cb_done = true;
+ cb->cb_status = 0;
+ cb->cb_need_restart = false;
}
void nfsd4_run_cb(struct nfsd4_callback *cb)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 38f2d7abe3a7..039f9c8a95e8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -94,6 +94,7 @@ static struct kmem_cache *lockowner_slab;
static struct kmem_cache *file_slab;
static struct kmem_cache *stateid_slab;
static struct kmem_cache *deleg_slab;
+static struct kmem_cache *odstate_slab;
static void free_session(struct nfsd4_session *);
@@ -281,6 +282,7 @@ put_nfs4_file(struct nfs4_file *fi)
if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
hlist_del_rcu(&fi->fi_hash);
spin_unlock(&state_lock);
+ WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
}
@@ -471,6 +473,86 @@ static void nfs4_file_put_access(struct nfs4_file *fp, u32 access)
__nfs4_file_put_access(fp, O_RDONLY);
}
+/*
+ * Allocate a new open/delegation state counter. This is needed for
+ * pNFS for proper return on close semantics.
+ *
+ * Note that we only allocate it for pNFS-enabled exports, otherwise
+ * all pointers to struct nfs4_clnt_odstate are always NULL.
+ */
+static struct nfs4_clnt_odstate *
+alloc_clnt_odstate(struct nfs4_client *clp)
+{
+ struct nfs4_clnt_odstate *co;
+
+ co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL);
+ if (co) {
+ co->co_client = clp;
+ atomic_set(&co->co_odcount, 1);
+ }
+ return co;
+}
+
+static void
+hash_clnt_odstate_locked(struct nfs4_clnt_odstate *co)
+{
+ struct nfs4_file *fp = co->co_file;
+
+ lockdep_assert_held(&fp->fi_lock);
+ list_add(&co->co_perfile, &fp->fi_clnt_odstate);
+}
+
+static inline void
+get_clnt_odstate(struct nfs4_clnt_odstate *co)
+{
+ if (co)
+ atomic_inc(&co->co_odcount);
+}
+
+static void
+put_clnt_odstate(struct nfs4_clnt_odstate *co)
+{
+ struct nfs4_file *fp;
+
+ if (!co)
+ return;
+
+ fp = co->co_file;
+ if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) {
+ list_del(&co->co_perfile);
+ spin_unlock(&fp->fi_lock);
+
+ nfsd4_return_all_file_layouts(co->co_client, fp);
+ kmem_cache_free(odstate_slab, co);
+ }
+}
+
+static struct nfs4_clnt_odstate *
+find_or_hash_clnt_odstate(struct nfs4_file *fp, struct nfs4_clnt_odstate *new)
+{
+ struct nfs4_clnt_odstate *co;
+ struct nfs4_client *cl;
+
+ if (!new)
+ return NULL;
+
+ cl = new->co_client;
+
+ spin_lock(&fp->fi_lock);
+ list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) {
+ if (co->co_client == cl) {
+ get_clnt_odstate(co);
+ goto out;
+ }
+ }
+ co = new;
+ co->co_file = fp;
+ hash_clnt_odstate_locked(new);
+out:
+ spin_unlock(&fp->fi_lock);
+ return co;
+}
+
struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
struct kmem_cache *slab)
{
@@ -606,7 +688,8 @@ static void block_delegations(struct knfsd_fh *fh)
}
static struct nfs4_delegation *
-alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
+alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
+ struct nfs4_clnt_odstate *odstate)
{
struct nfs4_delegation *dp;
long n;
@@ -631,6 +714,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
INIT_LIST_HEAD(&dp->dl_perfile);
INIT_LIST_HEAD(&dp->dl_perclnt);
INIT_LIST_HEAD(&dp->dl_recall_lru);
+ dp->dl_clnt_odstate = odstate;
+ get_clnt_odstate(odstate);
dp->dl_type = NFS4_OPEN_DELEGATE_READ;
dp->dl_retries = 1;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
@@ -714,6 +799,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
spin_lock(&state_lock);
unhash_delegation_locked(dp);
spin_unlock(&state_lock);
+ put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
nfs4_put_stid(&dp->dl_stid);
}
@@ -724,6 +810,7 @@ static void revoke_delegation(struct nfs4_delegation *dp)
WARN_ON(!list_empty(&dp->dl_recall_lru));
+ put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
if (clp->cl_minorversion == 0)
@@ -933,6 +1020,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
{
struct nfs4_ol_stateid *stp = openlockstateid(stid);
+ put_clnt_odstate(stp->st_clnt_odstate);
release_all_access(stp);
if (stp->st_stateowner)
nfs4_put_stateowner(stp->st_stateowner);
@@ -1538,7 +1626,6 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
INIT_LIST_HEAD(&clp->cl_openowners);
INIT_LIST_HEAD(&clp->cl_delegations);
INIT_LIST_HEAD(&clp->cl_lru);
- INIT_LIST_HEAD(&clp->cl_callbacks);
INIT_LIST_HEAD(&clp->cl_revoked);
#ifdef CONFIG_NFSD_PNFS
INIT_LIST_HEAD(&clp->cl_lo_states);
@@ -1634,6 +1721,7 @@ __destroy_client(struct nfs4_client *clp)
while (!list_empty(&reaplist)) {
dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
+ put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
nfs4_put_stid(&dp->dl_stid);
}
@@ -3057,6 +3145,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
spin_lock_init(&fp->fi_lock);
INIT_LIST_HEAD(&fp->fi_stateids);
INIT_LIST_HEAD(&fp->fi_delegations);
+ INIT_LIST_HEAD(&fp->fi_clnt_odstate);
fh_copy_shallow(&fp->fi_fhandle, fh);
fp->fi_deleg_file = NULL;
fp->fi_had_conflict = false;
@@ -3073,6 +3162,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
void
nfsd4_free_slabs(void)
{
+ kmem_cache_destroy(odstate_slab);
kmem_cache_destroy(openowner_slab);
kmem_cache_destroy(lockowner_slab);
kmem_cache_destroy(file_slab);
@@ -3103,8 +3193,14 @@ nfsd4_init_slabs(void)
sizeof(struct nfs4_delegation), 0, 0, NULL);
if (deleg_slab == NULL)
goto out_free_stateid_slab;
+ odstate_slab = kmem_cache_create("nfsd4_odstate",
+ sizeof(struct nfs4_clnt_odstate), 0, 0, NULL);
+ if (odstate_slab == NULL)
+ goto out_free_deleg_slab;
return 0;
+out_free_deleg_slab:
+ kmem_cache_destroy(deleg_slab);
out_free_stateid_slab:
kmem_cache_destroy(stateid_slab);
out_free_file_slab:
@@ -3581,6 +3677,14 @@ alloc_stateid:
open->op_stp = nfs4_alloc_open_stateid(clp);
if (!open->op_stp)
return nfserr_jukebox;
+
+ if (nfsd4_has_session(cstate) &&
+ (cstate->current_fh.fh_export->ex_flags & NFSEXP_PNFS)) {
+ open->op_odstate = alloc_clnt_odstate(clp);
+ if (!open->op_odstate)
+ return nfserr_jukebox;
+ }
+
return nfs_ok;
}
@@ -3869,7 +3973,7 @@ out_fput:
static struct nfs4_delegation *
nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
- struct nfs4_file *fp)
+ struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
{
int status;
struct nfs4_delegation *dp;
@@ -3877,7 +3981,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
if (fp->fi_had_conflict)
return ERR_PTR(-EAGAIN);
- dp = alloc_init_deleg(clp, fh);
+ dp = alloc_init_deleg(clp, fh, odstate);
if (!dp)
return ERR_PTR(-ENOMEM);
@@ -3903,6 +4007,7 @@ out_unlock:
spin_unlock(&state_lock);
out:
if (status) {
+ put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_stid(&dp->dl_stid);
return ERR_PTR(status);
}
@@ -3980,7 +4085,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
default:
goto out_no_deleg;
}
- dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file);
+ dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate);
if (IS_ERR(dp))
goto out_no_deleg;
@@ -4069,6 +4174,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
release_open_stateid(stp);
goto out;
}
+
+ stp->st_clnt_odstate = find_or_hash_clnt_odstate(fp,
+ open->op_odstate);
+ if (stp->st_clnt_odstate == open->op_odstate)
+ open->op_odstate = NULL;
}
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
@@ -4129,6 +4239,8 @@ void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
kmem_cache_free(file_slab, open->op_file);
if (open->op_stp)
nfs4_put_stid(&open->op_stp->st_stid);
+ if (open->op_odstate)
+ kmem_cache_free(odstate_slab, open->op_odstate);
}
__be32
@@ -4385,10 +4497,17 @@ static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_s
return nfserr_old_stateid;
}
+static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols)
+{
+ if (ols->st_stateowner->so_is_open_owner &&
+ !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
+ return nfserr_bad_stateid;
+ return nfs_ok;
+}
+
static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
{
struct nfs4_stid *s;
- struct nfs4_ol_stateid *ols;
__be32 status = nfserr_bad_stateid;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
@@ -4418,13 +4537,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
break;
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
- ols = openlockstateid(s);
- if (ols->st_stateowner->so_is_open_owner
- && !(openowner(ols->st_stateowner)->oo_flags
- & NFS4_OO_CONFIRMED))
- status = nfserr_bad_stateid;
- else
- status = nfs_ok;
+ status = nfsd4_check_openowner_confirmed(openlockstateid(s));
break;
default:
printk("unknown stateid type %x\n", s->sc_type);
@@ -4516,8 +4629,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
status = nfs4_check_fh(current_fh, stp);
if (status)
goto out;
- if (stp->st_stateowner->so_is_open_owner
- && !(openowner(stp->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
+ status = nfsd4_check_openowner_confirmed(stp);
+ if (status)
goto out;
status = nfs4_check_openmode(stp, flags);
if (status)
@@ -4852,9 +4965,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
- nfsd4_return_all_file_layouts(stp->st_stateowner->so_client,
- stp->st_stid.sc_file);
-
nfsd4_close_open_stateid(stp);
/* put reference from nfs4_preprocess_seqid_op */
@@ -6488,6 +6598,7 @@ nfs4_state_shutdown_net(struct net *net)
list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
+ put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
nfs4_put_stid(&dp->dl_stid);
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4f3bfeb11766..dbc4f85a5008 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -63,12 +63,12 @@ typedef struct {
struct nfsd4_callback {
struct nfs4_client *cb_clp;
- struct list_head cb_per_client;
u32 cb_minorversion;
struct rpc_message cb_msg;
struct nfsd4_callback_ops *cb_ops;
struct work_struct cb_work;
- bool cb_done;
+ int cb_status;
+ bool cb_need_restart;
};
struct nfsd4_callback_ops {
@@ -126,6 +126,7 @@ struct nfs4_delegation {
struct list_head dl_perfile;
struct list_head dl_perclnt;
struct list_head dl_recall_lru; /* delegation recalled */
+ struct nfs4_clnt_odstate *dl_clnt_odstate;
u32 dl_type;
time_t dl_time;
/* For recall: */
@@ -332,7 +333,6 @@ struct nfs4_client {
int cl_cb_state;
struct nfsd4_callback cl_cb_null;
struct nfsd4_session *cl_cb_session;
- struct list_head cl_callbacks; /* list of in-progress callbacks */
/* for all client information that callback code might need: */
spinlock_t cl_lock;
@@ -465,6 +465,17 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so)
}
/*
+ * Per-client state indicating no. of opens and outstanding delegations
+ * on a file from a particular client.'od' stands for 'open & delegation'
+ */
+struct nfs4_clnt_odstate {
+ struct nfs4_client *co_client;
+ struct nfs4_file *co_file;
+ struct list_head co_perfile;
+ atomic_t co_odcount;
+};
+
+/*
* nfs4_file: a file opened by some number of (open) nfs4_stateowners.
*
* These objects are global. nfsd keeps one instance of a nfs4_file per
@@ -485,6 +496,7 @@ struct nfs4_file {
struct list_head fi_delegations;
struct rcu_head fi_rcu;
};
+ struct list_head fi_clnt_odstate;
/* One each for O_RDONLY, O_WRONLY, O_RDWR: */
struct file * fi_fds[3];
/*
@@ -526,6 +538,7 @@ struct nfs4_ol_stateid {
struct list_head st_perstateowner;
struct list_head st_locks;
struct nfs4_stateowner * st_stateowner;
+ struct nfs4_clnt_odstate * st_clnt_odstate;
unsigned char st_access_bmap;
unsigned char st_deny_bmap;
struct nfs4_ol_stateid * st_openstp;
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index f982ae84f0cd..2f8c092be2b3 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -247,6 +247,7 @@ struct nfsd4_open {
struct nfs4_openowner *op_openowner; /* used during processing */
struct nfs4_file *op_file; /* used during processing */
struct nfs4_ol_stateid *op_stp; /* used during processing */
+ struct nfs4_clnt_odstate *op_odstate; /* used during processing */
struct nfs4_acl *op_acl;
struct xdr_netobj op_label;
};
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 059f37137f9a..919fd5bb14a8 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -388,7 +388,7 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
nchildren = nilfs_btree_node_get_nchildren(node);
if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
- level > NILFS_BTREE_LEVEL_MAX ||
+ level >= NILFS_BTREE_LEVEL_MAX ||
nchildren < 0 ||
nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a6944b25fd5b..fdf4b41d0609 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -757,6 +757,19 @@ lookup:
if (tmpres) {
spin_unlock(&dlm->spinlock);
spin_lock(&tmpres->spinlock);
+
+ /*
+ * Right after dlm spinlock was released, dlm_thread could have
+ * purged the lockres. Check if lockres got unhashed. If so
+ * start over.
+ */
+ if (hlist_unhashed(&tmpres->hash_node)) {
+ spin_unlock(&tmpres->spinlock);
+ dlm_lockres_put(tmpres);
+ tmpres = NULL;
+ goto lookup;
+ }
+
/* Wait on the thread that is mastering the resource */
if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
__dlm_wait_on_lockres(tmpres);
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
index 082234581d05..83f4e76511c2 100644
--- a/fs/omfs/bitmap.c
+++ b/fs/omfs/bitmap.c
@@ -159,7 +159,7 @@ int omfs_allocate_range(struct super_block *sb,
goto out;
found:
- *return_block = i * bits_per_entry + bit;
+ *return_block = (u64) i * bits_per_entry + bit;
*return_size = run;
ret = set_run(sb, i, bits_per_entry, bit, run, 1);
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 138321b0c6c2..3d935c81789a 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -306,7 +306,8 @@ static const struct super_operations omfs_sops = {
*/
static int omfs_get_imap(struct super_block *sb)
{
- unsigned int bitmap_size, count, array_size;
+ unsigned int bitmap_size, array_size;
+ int count;
struct omfs_sb_info *sbi = OMFS_SB(sb);
struct buffer_head *bh;
unsigned long **ptr;
@@ -359,7 +360,7 @@ nomem:
}
enum {
- Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask
+ Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask, Opt_err
};
static const match_table_t tokens = {
@@ -368,6 +369,7 @@ static const match_table_t tokens = {
{Opt_umask, "umask=%o"},
{Opt_dmask, "dmask=%o"},
{Opt_fmask, "fmask=%o"},
+ {Opt_err, NULL},
};
static int parse_options(char *options, struct omfs_sb_info *sbi)
@@ -548,8 +550,10 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
}
sb->s_root = d_make_root(root);
- if (!sb->s_root)
+ if (!sb->s_root) {
+ ret = -ENOMEM;
goto out_brelse_bh2;
+ }
printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name);
ret = 0;
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 24f640441bd9..84d693d37428 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -299,6 +299,9 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
struct cred *override_cred;
char *link = NULL;
+ if (WARN_ON(!workdir))
+ return -EROFS;
+
ovl_path_upper(parent, &parentpath);
upperdir = parentpath.dentry;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index d139405d2bfa..692ceda3bc21 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -222,6 +222,9 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
struct kstat stat;
int err;
+ if (WARN_ON(!workdir))
+ return ERR_PTR(-EROFS);
+
err = ovl_lock_rename_workdir(workdir, upperdir);
if (err)
goto out;
@@ -322,6 +325,9 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
struct dentry *newdentry;
int err;
+ if (WARN_ON(!workdir))
+ return -EROFS;
+
err = ovl_lock_rename_workdir(workdir, upperdir);
if (err)
goto out;
@@ -506,11 +512,28 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
struct dentry *opaquedir = NULL;
int err;
- if (is_dir && OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) {
- opaquedir = ovl_check_empty_and_clear(dentry);
- err = PTR_ERR(opaquedir);
- if (IS_ERR(opaquedir))
- goto out;
+ if (WARN_ON(!workdir))
+ return -EROFS;
+
+ if (is_dir) {
+ if (OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) {
+ opaquedir = ovl_check_empty_and_clear(dentry);
+ err = PTR_ERR(opaquedir);
+ if (IS_ERR(opaquedir))
+ goto out;
+ } else {
+ LIST_HEAD(list);
+
+ /*
+ * When removing an empty opaque directory, then it
+ * makes no sense to replace it with an exact replica of
+ * itself. But emptiness still needs to be checked.
+ */
+ err = ovl_check_empty_dir(dentry, &list);
+ ovl_cache_free(&list);
+ if (err)
+ goto out;
+ }
}
err = ovl_lock_rename_workdir(workdir, upperdir);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 5f0d1993e6e3..bf8537c7f455 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -529,7 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
{
struct ovl_fs *ufs = sb->s_fs_info;
- if (!(*flags & MS_RDONLY) && !ufs->upper_mnt)
+ if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir))
return -EROFS;
return 0;
@@ -925,9 +925,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
err = PTR_ERR(ufs->workdir);
if (IS_ERR(ufs->workdir)) {
- pr_err("overlayfs: failed to create directory %s/%s\n",
- ufs->config.workdir, OVL_WORKDIR_NAME);
- goto out_put_upper_mnt;
+ pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
+ ufs->config.workdir, OVL_WORKDIR_NAME, -err);
+ sb->s_flags |= MS_RDONLY;
+ ufs->workdir = NULL;
}
}
@@ -997,7 +998,6 @@ out_put_lower_mnt:
kfree(ufs->lower_mnt);
out_put_workdir:
dput(ufs->workdir);
-out_put_upper_mnt:
mntput(ufs->upper_mnt);
out_put_lowerpath:
for (i = 0; i < numlower; i++)
diff --git a/fs/splice.c b/fs/splice.c
index 476024bb6546..bfe62ae40f40 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1161,7 +1161,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
long ret, bytes;
umode_t i_mode;
size_t len;
- int i, flags;
+ int i, flags, more;
/*
* We require the input being a regular file, as we don't want to
@@ -1204,6 +1204,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
* Don't block on output, we have to drain the direct pipe.
*/
sd->flags &= ~SPLICE_F_NONBLOCK;
+ more = sd->flags & SPLICE_F_MORE;
while (len) {
size_t read_len;
@@ -1217,6 +1218,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
sd->total_len = read_len;
/*
+ * If more data is pending, set SPLICE_F_MORE
+ * If this is the last data and SPLICE_F_MORE was not set
+ * initially, clears it.
+ */
+ if (read_len < len)
+ sd->flags |= SPLICE_F_MORE;
+ else if (!more)
+ sd->flags &= ~SPLICE_F_MORE;
+ /*
* NOTE: nonblocking mode only applies to the input. We
* must not do the output in nonblocking mode as then we
* could get stuck data in the internal pipe:
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 04e79d57bca6..e9d401ce93bb 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -574,8 +574,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
* After the last attribute is removed revert to original inode format,
* making all literal area available to the data fork once more.
*/
-STATIC void
-xfs_attr_fork_reset(
+void
+xfs_attr_fork_remove(
struct xfs_inode *ip,
struct xfs_trans *tp)
{
@@ -641,7 +641,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
(mp->m_flags & XFS_MOUNT_ATTR2) &&
(dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
!(args->op_flags & XFS_DA_OP_ADDNAME)) {
- xfs_attr_fork_reset(dp, args->trans);
+ xfs_attr_fork_remove(dp, args->trans);
} else {
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
@@ -905,7 +905,7 @@ xfs_attr3_leaf_to_shortform(
if (forkoff == -1) {
ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE);
- xfs_attr_fork_reset(dp, args->trans);
+ xfs_attr_fork_remove(dp, args->trans);
goto out;
}
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index 025c4b820c03..882c8d338891 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -53,7 +53,7 @@ int xfs_attr_shortform_remove(struct xfs_da_args *args);
int xfs_attr_shortform_list(struct xfs_attr_list_context *context);
int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
-
+void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp);
/*
* Internal routines when attribute fork size == XFS_LBSIZE(mp).
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index aeffeaaac0ec..f1026e86dabc 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3224,12 +3224,24 @@ xfs_bmap_extsize_align(
align_alen += temp;
align_off -= temp;
}
+
+ /* Same adjustment for the end of the requested area. */
+ temp = (align_alen % extsz);
+ if (temp)
+ align_alen += extsz - temp;
+
/*
- * Same adjustment for the end of the requested area.
+ * For large extent hint sizes, the aligned extent might be larger than
+ * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
+ * the length back under MAXEXTLEN. The outer allocation loops handle
+ * short allocation just fine, so it is safe to do this. We only want to
+ * do it when we are forced to, though, because it means more allocation
+ * operations are required.
*/
- if ((temp = (align_alen % extsz))) {
- align_alen += extsz - temp;
- }
+ while (align_alen > MAXEXTLEN)
+ align_alen -= extsz;
+ ASSERT(align_alen <= MAXEXTLEN);
+
/*
* If the previous block overlaps with this proposed allocation
* then move the start forward without adjusting the length.
@@ -3318,7 +3330,9 @@ xfs_bmap_extsize_align(
return -EINVAL;
} else {
ASSERT(orig_off >= align_off);
- ASSERT(orig_end <= align_off + align_alen);
+ /* see MAXEXTLEN handling above */
+ ASSERT(orig_end <= align_off + align_alen ||
+ align_alen + extsz > MAXEXTLEN);
}
#ifdef DEBUG
@@ -4099,13 +4113,6 @@ xfs_bmapi_reserve_delalloc(
/* Figure out the extent size, adjust alen */
extsz = xfs_get_extsz_hint(ip);
if (extsz) {
- /*
- * Make sure we don't exceed a single extent length when we
- * align the extent by reducing length we are going to
- * allocate by the maximum amount extent size aligment may
- * require.
- */
- alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1));
error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
1, 0, &aoff, &alen);
ASSERT(!error);
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 07349a183a11..1c9e75521250 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -376,7 +376,7 @@ xfs_ialloc_ag_alloc(
*/
newlen = args.mp->m_ialloc_inos;
if (args.mp->m_maxicount &&
- percpu_counter_read(&args.mp->m_icount) + newlen >
+ percpu_counter_read_positive(&args.mp->m_icount) + newlen >
args.mp->m_maxicount)
return -ENOSPC;
args.minlen = args.maxlen = args.mp->m_ialloc_blks;
@@ -1339,10 +1339,13 @@ xfs_dialloc(
* If we have already hit the ceiling of inode blocks then clear
* okalloc so we scan all available agi structures for a free
* inode.
+ *
+ * Read rough value of mp->m_icount by percpu_counter_read_positive,
+ * which will sacrifice the preciseness but improve the performance.
*/
if (mp->m_maxicount &&
- percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
- mp->m_maxicount) {
+ percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos
+ > mp->m_maxicount) {
noroom = 1;
okalloc = 0;
}
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index f9c1c64782d3..3fbf167cfb4c 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -380,23 +380,31 @@ xfs_attr3_root_inactive(
return error;
}
+/*
+ * xfs_attr_inactive kills all traces of an attribute fork on an inode. It
+ * removes both the on-disk and in-memory inode fork. Note that this also has to
+ * handle the condition of inodes without attributes but with an attribute fork
+ * configured, so we can't use xfs_inode_hasattr() here.
+ *
+ * The in-memory attribute fork is removed even on error.
+ */
int
-xfs_attr_inactive(xfs_inode_t *dp)
+xfs_attr_inactive(
+ struct xfs_inode *dp)
{
- xfs_trans_t *trans;
- xfs_mount_t *mp;
- int error;
+ struct xfs_trans *trans;
+ struct xfs_mount *mp;
+ int cancel_flags = 0;
+ int lock_mode = XFS_ILOCK_SHARED;
+ int error = 0;
mp = dp->i_mount;
ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
- xfs_ilock(dp, XFS_ILOCK_SHARED);
- if (!xfs_inode_hasattr(dp) ||
- dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
- xfs_iunlock(dp, XFS_ILOCK_SHARED);
- return 0;
- }
- xfs_iunlock(dp, XFS_ILOCK_SHARED);
+ xfs_ilock(dp, lock_mode);
+ if (!XFS_IFORK_Q(dp))
+ goto out_destroy_fork;
+ xfs_iunlock(dp, lock_mode);
/*
* Start our first transaction of the day.
@@ -408,13 +416,18 @@ xfs_attr_inactive(xfs_inode_t *dp)
* the inode in every transaction to let it float upward through
* the log.
*/
+ lock_mode = 0;
trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0);
- if (error) {
- xfs_trans_cancel(trans, 0);
- return error;
- }
- xfs_ilock(dp, XFS_ILOCK_EXCL);
+ if (error)
+ goto out_cancel;
+
+ lock_mode = XFS_ILOCK_EXCL;
+ cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT;
+ xfs_ilock(dp, lock_mode);
+
+ if (!XFS_IFORK_Q(dp))
+ goto out_cancel;
/*
* No need to make quota reservations here. We expect to release some
@@ -422,29 +435,31 @@ xfs_attr_inactive(xfs_inode_t *dp)
*/
xfs_trans_ijoin(trans, dp, 0);
- /*
- * Decide on what work routines to call based on the inode size.
- */
- if (!xfs_inode_hasattr(dp) ||
- dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
- error = 0;
- goto out;
+ /* invalidate and truncate the attribute fork extents */
+ if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
+ error = xfs_attr3_root_inactive(&trans, dp);
+ if (error)
+ goto out_cancel;
+
+ error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
+ if (error)
+ goto out_cancel;
}
- error = xfs_attr3_root_inactive(&trans, dp);
- if (error)
- goto out;
- error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
- if (error)
- goto out;
+ /* Reset the attribute fork - this also destroys the in-core fork */
+ xfs_attr_fork_remove(dp, trans);
error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
- xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
+ xfs_iunlock(dp, lock_mode);
return error;
-out:
- xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
- xfs_iunlock(dp, XFS_ILOCK_EXCL);
+out_cancel:
+ xfs_trans_cancel(trans, cancel_flags);
+out_destroy_fork:
+ /* kill the in-core attr fork before we drop the inode lock */
+ if (dp->i_afp)
+ xfs_idestroy_fork(dp, XFS_ATTR_FORK);
+ if (lock_mode)
+ xfs_iunlock(dp, lock_mode);
return error;
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 8121e75352ee..3b7591224f4a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -124,7 +124,7 @@ xfs_iozero(
status = 0;
} while (count);
- return (-status);
+ return status;
}
int
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d6ebc85192b7..539a85fddbc2 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1946,21 +1946,17 @@ xfs_inactive(
/*
* If there are attributes associated with the file then blow them away
* now. The code calls a routine that recursively deconstructs the
- * attribute fork. We need to just commit the current transaction
- * because we can't use it for xfs_attr_inactive().
+ * attribute fork. If also blows away the in-core attribute fork.
*/
- if (ip->i_d.di_anextents > 0) {
- ASSERT(ip->i_d.di_forkoff != 0);
-
+ if (XFS_IFORK_Q(ip)) {
error = xfs_attr_inactive(ip);
if (error)
return;
}
- if (ip->i_afp)
- xfs_idestroy_fork(ip, XFS_ATTR_FORK);
-
+ ASSERT(!ip->i_afp);
ASSERT(ip->i_d.di_anextents == 0);
+ ASSERT(ip->i_d.di_forkoff == 0);
/*
* Free the inode.
@@ -2883,7 +2879,13 @@ xfs_rename_alloc_whiteout(
if (error)
return error;
- /* Satisfy xfs_bumplink that this is a real tmpfile */
+ /*
+ * Prepare the tmpfile inode as if it were created through the VFS.
+ * Otherwise, the link increment paths will complain about nlink 0->1.
+ * Drop the link count as done by d_tmpfile(), complete the inode setup
+ * and flag it as linkable.
+ */
+ drop_nlink(VFS_I(tmpfile));
xfs_finish_inode_setup(tmpfile);
VFS_I(tmpfile)->i_state |= I_LINKABLE;
@@ -3151,7 +3153,7 @@ xfs_rename(
* intermediate state on disk.
*/
if (wip) {
- ASSERT(wip->i_d.di_nlink == 0);
+ ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
error = xfs_bumplink(tp, wip);
if (error)
goto out_trans_abort;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2ce7ee3b4ec1..6f23fbdfb365 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1084,14 +1084,18 @@ xfs_log_sbcount(xfs_mount_t *mp)
return xfs_sync_sb(mp, true);
}
+/*
+ * Deltas for the inode count are +/-64, hence we use a large batch size
+ * of 128 so we don't need to take the counter lock on every update.
+ */
+#define XFS_ICOUNT_BATCH 128
int
xfs_mod_icount(
struct xfs_mount *mp,
int64_t delta)
{
- /* deltas are +/-64, hence the large batch size of 128. */
- __percpu_counter_add(&mp->m_icount, delta, 128);
- if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
+ __percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
+ if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
ASSERT(0);
percpu_counter_add(&mp->m_icount, -delta);
return -EINVAL;
@@ -1113,6 +1117,14 @@ xfs_mod_ifree(
return 0;
}
+/*
+ * Deltas for the block count can vary from 1 to very large, but lock contention
+ * only occurs on frequent small block count updates such as in the delayed
+ * allocation path for buffered writes (page a time updates). Hence we set
+ * a large batch count (1024) to minimise global counter updates except when
+ * we get near to ENOSPC and we have to be very accurate with our updates.
+ */
+#define XFS_FDBLOCKS_BATCH 1024
int
xfs_mod_fdblocks(
struct xfs_mount *mp,
@@ -1151,25 +1163,19 @@ xfs_mod_fdblocks(
* Taking blocks away, need to be more accurate the closer we
* are to zero.
*
- * batch size is set to a maximum of 1024 blocks - if we are
- * allocating of freeing extents larger than this then we aren't
- * going to be hammering the counter lock so a lock per update
- * is not a problem.
- *
* If the counter has a value of less than 2 * max batch size,
* then make everything serialise as we are real close to
* ENOSPC.
*/
-#define __BATCH 1024
- if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
+ if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH,
+ XFS_FDBLOCKS_BATCH) < 0)
batch = 1;
else
- batch = __BATCH;
-#undef __BATCH
+ batch = XFS_FDBLOCKS_BATCH;
__percpu_counter_add(&mp->m_fdblocks, delta, batch);
- if (percpu_counter_compare(&mp->m_fdblocks,
- XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
+ if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp),
+ XFS_FDBLOCKS_BATCH) >= 0) {
/* we had space! */
return 0;
}
OpenPOWER on IntegriCloud