summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/dev-replace.c2
-rw-r--r--fs/btrfs/disk-io.c3
-rw-r--r--fs/btrfs/extent-tree.c18
-rw-r--r--fs/btrfs/transaction.c3
-rw-r--r--fs/dax.c14
-rw-r--r--fs/namei.c7
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c44
-rw-r--r--fs/xfs/xfs_file.c21
-rw-r--r--fs/xfs/xfs_log_recover.c11
9 files changed, 98 insertions, 25 deletions
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 862fbc206755..564a7de17d99 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -378,7 +378,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
ret = btrfs_kobj_add_device(tgt_device->fs_devices, tgt_device);
if (ret)
- btrfs_error(root->fs_info, ret, "kobj add dev failed");
+ btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret);
printk_in_rcu(KERN_INFO
"BTRFS: dev_replace from %s (devid %llu) to %s started\n",
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a9aadb2ad525..f556c3732c2c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2842,6 +2842,7 @@ int open_ctree(struct super_block *sb,
!extent_buffer_uptodate(chunk_root->node)) {
printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
sb->s_id);
+ chunk_root->node = NULL;
goto fail_tree_roots;
}
btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
@@ -2879,7 +2880,7 @@ retry_root_backup:
!extent_buffer_uptodate(tree_root->node)) {
printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
sb->s_id);
-
+ tree_root->node = NULL;
goto recovery_tree_root;
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 171312d51799..07204bf601ed 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4227,6 +4227,24 @@ out:
space_info->chunk_alloc = 0;
spin_unlock(&space_info->lock);
mutex_unlock(&fs_info->chunk_mutex);
+ /*
+ * When we allocate a new chunk we reserve space in the chunk block
+ * reserve to make sure we can COW nodes/leafs in the chunk tree or
+ * add new nodes/leafs to it if we end up needing to do it when
+ * inserting the chunk item and updating device items as part of the
+ * second phase of chunk allocation, performed by
+ * btrfs_finish_chunk_alloc(). So make sure we don't accumulate a
+ * large number of new block groups to create in our transaction
+ * handle's new_bgs list to avoid exhausting the chunk block reserve
+ * in extreme cases - like having a single transaction create many new
+ * block groups when starting to write out the free space caches of all
+ * the block groups that were made dirty during the lifetime of the
+ * transaction.
+ */
+ if (trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) {
+ btrfs_create_pending_block_groups(trans, trans->root);
+ btrfs_trans_release_chunk_metadata(trans);
+ }
return ret;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 51e0f0d0053e..f5021fcb154e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2152,7 +2152,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
kmem_cache_free(btrfs_trans_handle_cachep, trans);
- if (current != root->fs_info->transaction_kthread)
+ if (current != root->fs_info->transaction_kthread &&
+ current != root->fs_info->cleaner_kthread)
btrfs_run_delayed_iputs(root);
return ret;
diff --git a/fs/dax.c b/fs/dax.c
index c3e21ccfc358..a7f77e1fa18c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -319,6 +319,12 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
* @vma: The virtual memory area where the fault occurred
* @vmf: The description of the fault
* @get_block: The filesystem method used to translate file offsets to blocks
+ * @complete_unwritten: The filesystem method used to convert unwritten blocks
+ * to written so the data written to them is exposed. This is required for
+ * required by write faults for filesystems that will return unwritten
+ * extent mappings from @get_block, but it is optional for reads as
+ * dax_insert_mapping() will always zero unwritten blocks. If the fs does
+ * not support unwritten extents, the it should pass NULL.
*
* When a page fault occurs, filesystems may call this helper in their
* fault handler for DAX files. __dax_fault() assumes the caller has done all
@@ -437,8 +443,12 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
* as for normal BH based IO completions.
*/
error = dax_insert_mapping(inode, &bh, vma, vmf);
- if (buffer_unwritten(&bh))
- complete_unwritten(&bh, !error);
+ if (buffer_unwritten(&bh)) {
+ if (complete_unwritten)
+ complete_unwritten(&bh, !error);
+ else
+ WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
+ }
out:
if (error == -ENOMEM)
diff --git a/fs/namei.c b/fs/namei.c
index ae4e4c18b2ac..fbbcf0993312 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1954,8 +1954,13 @@ OK:
continue;
}
}
- if (unlikely(!d_can_lookup(nd->path.dentry)))
+ if (unlikely(!d_can_lookup(nd->path.dentry))) {
+ if (nd->flags & LOOKUP_RCU) {
+ if (unlazy_walk(nd, NULL, 0))
+ return -ECHILD;
+ }
return -ENOTDIR;
+ }
}
}
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 20de88d1bf86..dd714037c322 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -159,11 +159,10 @@ xfs_attr3_rmt_write_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ int blksize = mp->m_attr_geo->blksize;
char *ptr;
int len;
xfs_daddr_t bno;
- int blksize = mp->m_attr_geo->blksize;
/* no verification of non-crc buffers */
if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -175,16 +174,22 @@ xfs_attr3_rmt_write_verify(
ASSERT(len >= blksize);
while (len > 0) {
+ struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;
+
if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
- if (bip) {
- struct xfs_attr3_rmt_hdr *rmt;
- rmt = (struct xfs_attr3_rmt_hdr *)ptr;
- rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ /*
+ * Ensure we aren't writing bogus LSNs to disk. See
+ * xfs_attr3_rmt_hdr_set() for the explanation.
+ */
+ if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) {
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
+ return;
}
xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
@@ -221,6 +226,18 @@ xfs_attr3_rmt_hdr_set(
rmt->rm_owner = cpu_to_be64(ino);
rmt->rm_blkno = cpu_to_be64(bno);
+ /*
+ * Remote attribute blocks are written synchronously, so we don't
+ * have an LSN that we can stamp in them that makes any sense to log
+ * recovery. To ensure that log recovery handles overwrites of these
+ * blocks sanely (i.e. once they've been freed and reallocated as some
+ * other type of metadata) we need to ensure that the LSN has a value
+ * that tells log recovery to ignore the LSN and overwrite the buffer
+ * with whatever is in it's log. To do this, we use the magic
+ * NULLCOMMITLSN to indicate that the LSN is invalid.
+ */
+ rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN);
+
return sizeof(struct xfs_attr3_rmt_hdr);
}
@@ -434,14 +451,21 @@ xfs_attr_rmtval_set(
/*
* Allocate a single extent, up to the size of the value.
+ *
+ * Note that we have to consider this a data allocation as we
+ * write the remote attribute without logging the contents.
+ * Hence we must ensure that we aren't using blocks that are on
+ * the busy list so that we don't overwrite blocks which have
+ * recently been freed but their transactions are not yet
+ * committed to disk. If we overwrite the contents of a busy
+ * extent and then crash then the block may not contain the
+ * correct metadata after log recovery occurs.
*/
xfs_bmap_init(args->flist, args->firstblock);
nmap = 1;
error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
- blkcnt,
- XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
- args->firstblock, args->total, &map, &nmap,
- args->flist);
+ blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
+ args->total, &map, &nmap, args->flist);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
&committed);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f0e8249722d4..db4acc1c3e73 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1514,18 +1514,27 @@ xfs_filemap_fault(
struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct xfs_inode *ip = XFS_I(file_inode(vma->vm_file));
+ struct inode *inode = file_inode(vma->vm_file);
int ret;
- trace_xfs_filemap_fault(ip);
+ trace_xfs_filemap_fault(XFS_I(inode));
/* DAX can shortcut the normal fault path on write faults! */
- if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip)))
+ if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode))
return xfs_filemap_page_mkwrite(vma, vmf);
- xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
- ret = filemap_fault(vma, vmf);
- xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+ xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+ if (IS_DAX(inode)) {
+ /*
+ * we do not want to trigger unwritten extent conversion on read
+ * faults - that is unnecessary overhead and would also require
+ * changes to xfs_get_blocks_direct() to map unwritten extent
+ * ioend for conversion on read-only mappings.
+ */
+ ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL);
+ } else
+ ret = filemap_fault(vma, vmf);
+ xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
return ret;
}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 01dd228ca05e..480ebba8464f 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1886,9 +1886,14 @@ xlog_recover_get_buf_lsn(
uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
break;
case XFS_ATTR3_RMT_MAGIC:
- lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
- uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid;
- break;
+ /*
+ * Remote attr blocks are written synchronously, rather than
+ * being logged. That means they do not contain a valid LSN
+ * (i.e. transactionally ordered) in them, and hence any time we
+ * see a buffer to replay over the top of a remote attribute
+ * block we should simply do so.
+ */
+ goto recover_immediately;
case XFS_SB_MAGIC:
lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
uuid = &((struct xfs_dsb *)blk)->sb_uuid;
OpenPOWER on IntegriCloud