1 files changed, 14 insertions, 37 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a24a0ba523d6..003a826f4cff 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4141,6 +4141,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 
 	INIT_LIST_HEAD(&extents);
 
+	down_write(&BTRFS_I(inode)->dio_sem);
 	write_lock(&tree->lock);
 	test_gen = root->fs_info->last_trans_committed;
 
@@ -4169,13 +4170,20 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 	}
 
 	list_sort(NULL, &extents, extent_cmp);
+	btrfs_get_logged_extents(inode, logged_list, start, end);
 	/*
-	 * Collect any new ordered extents within the range. This is to
-	 * prevent logging file extent items without waiting for the disk
-	 * location they point to being written. We do this only to deal
-	 * with races against concurrent lockless direct IO writes.
+	 * Some ordered extents started by fsync might have completed
+	 * before we could collect them into the list logged_list, which
+	 * means they're gone, not in our logged_list nor in the inode's
+	 * ordered tree. We want the application/user space to know an
+	 * error happened while attempting to persist file data so that
+	 * it can take proper action. If such error happened, we leave
+	 * without writing to the log tree and the fsync must report the
+	 * file data write error and not commit the current transaction.
 	 */
-	btrfs_get_logged_extents(inode, logged_list, start, end);
+	ret = btrfs_inode_check_errors(inode);
+	if (ret)
+		ctx->io_err = ret;
 process:
 	while (!list_empty(&extents)) {
 		em = list_entry(extents.next, struct extent_map, list);
@@ -4202,6 +4210,7 @@ process:
 	}
 	WARN_ON(!list_empty(&extents));
 	write_unlock(&tree->lock);
+	up_write(&BTRFS_I(inode)->dio_sem);
 
 	btrfs_release_path(path);
 	return ret;
@@ -4623,23 +4632,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 	mutex_lock(&BTRFS_I(inode)->log_mutex);
 
 	/*
-	 * Collect ordered extents only if we are logging data. This is to
-	 * ensure a subsequent request to log this inode in LOG_INODE_ALL mode
-	 * will process the ordered extents if they still exists at the time,
-	 * because when we collect them we test and set for the flag
-	 * BTRFS_ORDERED_LOGGED to prevent multiple log requests to process the
-	 * same ordered extents. The consequence for the LOG_INODE_ALL log mode
-	 * not processing the ordered extents is that we end up logging the
-	 * corresponding file extent items, based on the extent maps in the
-	 * inode's extent_map_tree's modified_list, without logging the
-	 * respective checksums (since the may still be only attached to the
-	 * ordered extents and have not been inserted in the csum tree by
-	 * btrfs_finish_ordered_io() yet).
-	 */
-	if (inode_only == LOG_INODE_ALL)
-		btrfs_get_logged_extents(inode, &logged_list, start, end);
-
-	/*
 	 * a brute force approach to making sure we get the most uptodate
 	 * copies of everything.
 	 */
@@ -4846,21 +4838,6 @@ log_extents:
 			goto out_unlock;
 	}
 	if (fast_search) {
-		/*
-		 * Some ordered extents started by fsync might have completed
-		 * before we collected the ordered extents in logged_list, which
-		 * means they're gone, not in our logged_list nor in the inode's
-		 * ordered tree. We want the application/user space to know an
-		 * error happened while attempting to persist file data so that
-		 * it can take proper action. If such error happened, we leave
-		 * without writing to the log tree and the fsync must report the
-		 * file data write error and not commit the current transaction.
-		 */
-		err = btrfs_inode_check_errors(inode);
-		if (err) {
-			ctx->io_err = err;
-			goto out_unlock;
-		}
 		ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
 						&logged_list, ctx, start, end);
 		if (ret) {