summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/qgroup.c
diff options
context:
space:
mode:
authorQu Wenruo <quwenruo@cn.fujitsu.com>2017-02-27 15:10:39 +0800
committerDavid Sterba <dsterba@suse.com>2017-06-29 20:17:02 +0200
commitbc42bda22345efdb5d8b578d1b4df2c6eaa85c58 (patch)
tree825e007666671ac31bd140e61b48540178cbb49e /fs/btrfs/qgroup.c
parent364ecf3651e0862152c8b340d7cb3021dc0122c7 (diff)
downloadtalos-obmc-linux-bc42bda22345efdb5d8b578d1b4df2c6eaa85c58.tar.gz
talos-obmc-linux-bc42bda22345efdb5d8b578d1b4df2c6eaa85c58.zip
btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges
[BUG] For the following case, btrfs can underflow qgroup reserved space at an error path: (Page size 4K, function name without "btrfs_" prefix) Task A | Task B ---------------------------------------------------------------------- Buffered_write [0, 2K) | |- check_data_free_space() | | |- qgroup_reserve_data() | | Range aligned to page | | range [0, 4K) <<< | | 4K bytes reserved <<< | |- copy pages to page cache | | Buffered_write [2K, 4K) | |- check_data_free_space() | | |- qgroup_reserved_data() | | Range alinged to page | | range [0, 4K) | | Already reserved by A <<< | | 0 bytes reserved <<< | |- delalloc_reserve_metadata() | | And it *FAILED* (Maybe EQUOTA) | |- free_reserved_data_space() |- qgroup_free_data() Range aligned to page range [0, 4K) Freeing 4K (Special thanks to Chandan for the detailed report and analyse) [CAUSE] Above Task B is freeing reserved data range [0, 4K) which is actually reserved by Task A. And at writeback time, page dirty by Task A will go through writeback routine, which will free 4K reserved data space at file extent insert time, causing the qgroup underflow. [FIX] For btrfs_qgroup_free_data(), add @reserved parameter to only free data ranges reserved by previous btrfs_qgroup_reserve_data(). So in above case, Task B will try to free 0 byte, so no underflow. Reported-by: Chandan Rajendra <chandan@linux.vnet.ibm.com> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Reviewed-by: Chandan Rajendra <chandan@linux.vnet.ibm.com> Tested-by: Chandan Rajendra <chandan@linux.vnet.ibm.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/qgroup.c')
-rw-r--r--fs/btrfs/qgroup.c72
1 files changed, 67 insertions, 5 deletions
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 002088937021..fc9dffaa9524 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2892,13 +2892,72 @@ cleanup:
return ret;
}
-static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len,
- int free)
+/* Free ranges specified by @reserved, normally in error path */
+static int qgroup_free_reserved_data(struct inode *inode,
+ struct extent_changeset *reserved, u64 start, u64 len)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+ struct extent_changeset changeset;
+ int freed = 0;
+ int ret;
+
+ extent_changeset_init(&changeset);
+ len = round_up(start + len, root->fs_info->sectorsize);
+ start = round_down(start, root->fs_info->sectorsize);
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(&reserved->range_changed, &uiter))) {
+ u64 range_start = unode->val;
+ /* unode->aux is the inclusive end */
+ u64 range_len = unode->aux - range_start + 1;
+ u64 free_start;
+ u64 free_len;
+
+ extent_changeset_release(&changeset);
+
+ /* Only free range in range [start, start + len) */
+ if (range_start >= start + len ||
+ range_start + range_len <= start)
+ continue;
+ free_start = max(range_start, start);
+ free_len = min(start + len, range_start + range_len) -
+ free_start;
+ /*
+ * TODO: To also modify reserved->ranges_reserved to reflect
+ * the modification.
+ *
+ * However as long as we free qgroup reserved according to
+ * EXTENT_QGROUP_RESERVED, we won't double free.
+ * So not need to rush.
+ */
+ ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree,
+ free_start, free_start + free_len - 1,
+ EXTENT_QGROUP_RESERVED, &changeset);
+ if (ret < 0)
+ goto out;
+ freed += changeset.bytes_changed;
+ }
+ btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed);
+ ret = freed;
+out:
+ extent_changeset_release(&changeset);
+ return ret;
+}
+
+static int __btrfs_qgroup_release_data(struct inode *inode,
+ struct extent_changeset *reserved, u64 start, u64 len,
+ int free)
{
struct extent_changeset changeset;
int trace_op = QGROUP_RELEASE;
int ret;
+ /* In release case, we shouldn't have @reserved */
+ WARN_ON(!free && reserved);
+ if (free && reserved)
+ return qgroup_free_reserved_data(inode, reserved, start, len);
extent_changeset_init(&changeset);
ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
@@ -2924,14 +2983,17 @@ out:
*
* Should be called when a range of pages get invalidated before reaching disk.
* Or for error cleanup case.
+ * if @reserved is given, only reserved range in [@start, @start + @len) will
+ * be freed.
*
* For data written to disk, use btrfs_qgroup_release_data().
*
* NOTE: This function may sleep for memory allocation.
*/
-int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len)
+int btrfs_qgroup_free_data(struct inode *inode,
+ struct extent_changeset *reserved, u64 start, u64 len)
{
- return __btrfs_qgroup_release_data(inode, start, len, 1);
+ return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
}
/*
@@ -2951,7 +3013,7 @@ int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len)
*/
int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
{
- return __btrfs_qgroup_release_data(inode, start, len, 0);
+ return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
}
int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
OpenPOWER on IntegriCloud