From 57ba4cb85bffc0c7c6567c89d23713721fea9655 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 20 May 2016 04:34:23 +0100
Subject: Btrfs: fix race between device replace and block group removal

When it's finishing, the device replace code iterates all extent maps
representing block group and for each one that has a stripe that refers
to the source device, it replaces its device with the target device.
However when it replaces the source device with the target device it,
the target device still has an ID of 0ULL (BTRFS_DEV_REPLACE_DEVID),
only after its ID is changed to match the one from the source device.
This leads to races with the chunk removal code that can temporarly see
a device with an ID of 0ULL and then attempt to use that ID to remove
items from the device tree and fail, causing a transaction abort:

[ 9238.594364] BTRFS info (device sdf): dev_replace from /dev/sdf (devid 3) to /dev/sde finished
[ 9238.594377] ------------[ cut here ]------------
[ 9238.594402] WARNING: CPU: 14 PID: 21566 at fs/btrfs/volumes.c:2771 btrfs_remove_chunk+0x2e5/0x793 [btrfs]
[ 9238.594403] BTRFS: Transaction aborted (error 1)
[ 9238.594416] Modules linked in: btrfs crc32c_generic acpi_cpufreq xor tpm_tis tpm raid6_pq ppdev parport_pc processor psmouse parport i2c_piix4 evdev sg i2c_core se
rio_raw pcspkr button loop autofs4 ext4 crc16 jbd2 mbcache sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix virtio_pci libata virtio_ring virtio e1000 scsi_mod fl
oppy [last unloaded: btrfs]
[ 9238.594418] CPU: 14 PID: 21566 Comm: btrfs-cleaner Not tainted 4.6.0-rc7-btrfs-next-29+ #1
[ 9238.594419] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014
[ 9238.594421]  0000000000000000 ffff88017f1dbc60 ffffffff8126b42c ffff88017f1dbcb0
[ 9238.594422]  0000000000000000 ffff88017f1dbca0 ffffffff81052b14 00000ad37f1dbd18
[ 9238.594423]  0000000000000001 ffff88018068a558 ffff88005c4b9c00 ffff880233f60db0
[ 9238.594424] Call Trace:
[ 9238.594428]  [<ffffffff8126b42c>] dump_stack+0x67/0x90
[ 9238.594430]  [<ffffffff81052b14>] __warn+0xc2/0xdd
[ 9238.594432]  [<ffffffff81052b7a>] warn_slowpath_fmt+0x4b/0x53
[ 9238.594434]  [<ffffffff8116c311>] ? kmem_cache_free+0x128/0x188
[ 9238.594450]  [<ffffffffa04d43f5>] btrfs_remove_chunk+0x2e5/0x793 [btrfs]
[ 9238.594452]  [<ffffffff8108e456>] ? arch_local_irq_save+0x9/0xc
[ 9238.594464]  [<ffffffffa04a26fa>] btrfs_delete_unused_bgs+0x317/0x382 [btrfs]
[ 9238.594476]  [<ffffffffa04a961d>] cleaner_kthread+0x1ad/0x1c7 [btrfs]
[ 9238.594489]  [<ffffffffa04a9470>] ? btree_invalidatepage+0x8e/0x8e [btrfs]
[ 9238.594490]  [<ffffffff8106f403>] kthread+0xd4/0xdc
[ 9238.594494]  [<ffffffff8149e242>] ret_from_fork+0x22/0x40
[ 9238.594495]  [<ffffffff8106f32f>] ? kthread_stop+0x286/0x286
[ 9238.594496] ---[ end trace 183efbe50275f059 ]---

The sequence of steps leading to this is like the following:

              CPU 1                                           CPU 2

 btrfs_dev_replace_finishing()

   at this point
   dev_replace->tgtdev->devid ==
   BTRFS_DEV_REPLACE_DEVID (0ULL)

   ...

   btrfs_start_transaction()
   btrfs_commit_transaction()

                                                     btrfs_delete_unused_bgs()
                                                       btrfs_remove_chunk()

                                                         looks up for the extent map
                                                         corresponding to the chunk

                                                         lock_chunks() (chunk_mutex)
                                                         check_system_chunk()
                                                         unlock_chunks() (chunk_mutex)

   locks fs_info->chunk_mutex

   btrfs_dev_replace_update_device_in_mapping_tree()
     --> iterates fs_info->mapping_tree and
         replaces the device in every extent
         map's map->stripes[] with
         dev_replace->tgtdev, which still has
         an id of 0ULL (BTRFS_DEV_REPLACE_DEVID)

                                                         iterates over all stripes from
                                                         the extent map

                                                           --> calls btrfs_free_dev_extent()
                                                               passing it the target device
                                                               that still has an ID of 0ULL

                                                           --> btrfs_free_dev_extent() fails
                                                             --> aborts current transaction

   finishes setting up the target device,
   namely it sets tgtdev->devid to the value
   of srcdev->devid (which is necessarily > 0)

   frees the srcdev

   unlocks fs_info->chunk_mutex

So fix this by taking the device list mutex while processing the stripes
for the chunk's extent map. This is similar to the race between device
replace and block group creation that was fixed by commit 50460e37186a
("Btrfs: fix race when finishing dev replace leading to transaction abort").

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
---
 fs/btrfs/volumes.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'fs/btrfs/volumes.c')
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9c01824eef08..04ca48362ef1 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2761,6 +2761,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
 	u64 dev_extent_len = 0;
 	u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
 	int i, ret = 0;
+	struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
 
 	/* Just in case */
 	root = root->fs_info->chunk_root;
@@ -2787,12 +2788,19 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
 	check_system_chunk(trans, extent_root, map->type);
 	unlock_chunks(root->fs_info->chunk_root);
 
+	/*
+	 * Take the device list mutex to prevent races with the final phase of
+	 * a device replace operation that replaces the device object associated
+	 * with map stripes (dev-replace.c:btrfs_dev_replace_finishing()).
+	 */
+	mutex_lock(&fs_devices->device_list_mutex);
 	for (i = 0; i < map->num_stripes; i++) {
 		struct btrfs_device *device = map->stripes[i].dev;
 		ret = btrfs_free_dev_extent(trans, device,
 					    map->stripes[i].physical,
 					    &dev_extent_len);
 		if (ret) {
+			mutex_unlock(&fs_devices->device_list_mutex);
 			btrfs_abort_transaction(trans, root, ret);
 			goto out;
 		}
@@ -2811,11 +2819,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
 		if (map->stripes[i].dev) {
 			ret = btrfs_update_device(trans, map->stripes[i].dev);
 			if (ret) {
+				mutex_unlock(&fs_devices->device_list_mutex);
 				btrfs_abort_transaction(trans, root, ret);
 				goto out;
 			}
 		}
 	}
+	mutex_unlock(&fs_devices->device_list_mutex);
+
 	ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
 	if (ret) {
 		btrfs_abort_transaction(trans, root, ret);
-- 
cgit v1.2.1


From 22ab04e814f4fe2ce72a13d291491f98ef6ac757 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 18 May 2016 20:29:44 +0100
Subject: Btrfs: fix race between device replace and chunk allocation

While iterating and copying extents from the source device, the device
replace code keeps adjusting a left cursor that is used to make sure that
once we finish processing a device extent, any future writes to extents
from the corresponding block group will get into both the source and
target devices. This left cursor is also used for resuming the device
replace operation at mount time.

However using this left cursor to decide whether writes go into both
devices or only the source device is not enough to guarantee we don't
miss copying extents into the target device. There are two cases where
the current approach fails. The first one is related to when there are
holes in the device and they get allocated for new block groups while
the device replace operation is iterating the device extents (more on
this explained below). The second one is that when that loop over the
device extents finishes, we start dellaloc, wait for all ordered extents
and then commit the current transaction, we might have got new block
groups allocated that are now using a device extent that has an offset
greater then or equals to the value of the left cursor, in which case
writes to extents belonging to these new block groups will get issued
only to the source device.

For the first case where the current approach of using a left cursor
fails, consider the source device currently has the following layout:

  [ extent bg A ] [ hole, unallocated space ] [extent bg B ]
  3Gb             4Gb                         5Gb

While we are iterating the device extents from the source device using
the commit root of the device tree, the following happens:

        CPU 1                                            CPU 2

                      <we are at transaction N>

  scrub_enumerate_chunks()
    --> searches the device tree for
        extents belonging to the source
        device using the device tree's
        commit root
    --> 1st iteration finds extent belonging to
        block group A

        --> sets block group A to RO mode
            (btrfs_inc_block_group_ro)

        --> sets cursor left to found_key.offset
            which is 3Gb

        --> scrub_chunk() starts
            copies all allocated extents from
            block group's A stripe at source
            device into target device

                                                           btrfs_alloc_chunk()
                                                             --> allocates device extent
                                                                 in the range [4Gb, 5Gb[
                                                                 from the source device for
                                                                 a new block group C

                                                           extent allocated from block
                                                           group C for a direct IO,
                                                           buffered write or btree node/leaf

                                                           extent is written to, perhaps
                                                           in response to a writepages()
                                                           call from the VM or directly
                                                           through direct IO

                                                           the write is made only against
                                                           the source device and not against
                                                           the target device because the
                                                           extent's offset is in the interval
                                                           [4Gb, 5Gb[ which is larger then
                                                           the value of cursor_left (3Gb)

        --> scrub_chunks() finishes

        --> updates left cursor from 3Gb to
            4Gb

        --> btrfs_dec_block_group_ro() sets
            block group A back to RW mode

                             <we are still at transaction N>

    --> 2nd iteration finds extent belonging to
        block group B - it did not find the new
        extent in the range [4Gb, 5Gb[ for block
        group C because we are using the device
        tree's commit root or even because the
        block group's items are not all yet
        inserted in the respective btrees, that is,
        the block group is still attached to some
        transaction handle's new_bgs list and
        btrfs_create_pending_block_groups() was
        not called yet against that transaction
        handle, so the device extent items were
        not yet inserted into the devices tree

                             <we are still at transaction N>

        --> so we end not copying anything from the newly
            allocated device extent from the source device
            to the target device

So fix this by making __btrfs_map_block() always redirect writes to the
target device as well, independently of the left cursor's value. With
this change the left cursor is now used only for the purpose of tracking
progress and allow a mount operation to resume a device replace.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
---
 fs/btrfs/volumes.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

(limited to 'fs/btrfs/volumes.c')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 04ca48362ef1..765aabd9145f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5773,20 +5773,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 			}
 		}
 		if (found) {
-			if (physical_of_found + map->stripe_len <=
-			    dev_replace->cursor_left) {
-				struct btrfs_bio_stripe *tgtdev_stripe =
-					bbio->stripes + num_stripes;
+			struct btrfs_bio_stripe *tgtdev_stripe =
+				bbio->stripes + num_stripes;
 
-				tgtdev_stripe->physical = physical_of_found;
-				tgtdev_stripe->length =
-					bbio->stripes[index_srcdev].length;
-				tgtdev_stripe->dev = dev_replace->tgtdev;
-				bbio->tgtdev_map[index_srcdev] = num_stripes;
+			tgtdev_stripe->physical = physical_of_found;
+			tgtdev_stripe->length =
+				bbio->stripes[index_srcdev].length;
+			tgtdev_stripe->dev = dev_replace->tgtdev;
+			bbio->tgtdev_map[index_srcdev] = num_stripes;
 
-				tgtdev_indexes++;
-				num_stripes++;
-			}
+			tgtdev_indexes++;
+			num_stripes++;
 		}
 	}
 
-- 
cgit v1.2.1


From 65d4f4c151a5fa7b2dacaaf70def3f95001766d7 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Wed, 23 Sep 2015 15:00:37 -0400
Subject: Btrfs: end transaction if we abort when creating uuid root

We still need to call btrfs_end_transaction if we call btrfs_abort_transaction,
otherwise we hang and make me super grumpy.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/btrfs/volumes.c')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9c01824eef08..673c72ab4fbe 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4230,6 +4230,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
 	if (IS_ERR(uuid_root)) {
 		ret = PTR_ERR(uuid_root);
 		btrfs_abort_transaction(trans, tree_root, ret);
+		btrfs_end_transaction(trans, tree_root);
 		return ret;
 	}
 
-- 
cgit v1.2.1


From d865177a5e749827f248f6363f5100d3a2f66b0f Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Fri, 3 Jun 2016 17:41:42 -0700
Subject: Btrfs: clear uptodate flags of pages in sys_array eb

We set uptodate flag to pages in the temporary sys_array eb,
but do not clear the flag after free eb.  As the special
btree inode may still hold a reference on those pages, the
uptodate flag can remain alive in them.

If btrfs_super_chunk_root has been intentionally changed to the
offset of this sys_array eb, reading chunk_root will read content
of sys_array and it will skip our beautiful checks in
btree_readpage_end_io_hook() because of
"pages of eb are uptodate => eb is uptodate"

This adds the 'clear uptodate' part to force it to read from disk.

Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/btrfs/volumes.c')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 673c72ab4fbe..42ccde43053b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6631,12 +6631,14 @@ int btrfs_read_sys_array(struct btrfs_root *root)
 		sb_array_offset += len;
 		cur_offset += len;
 	}
+	clear_extent_buffer_uptodate(sb);
 	free_extent_buffer_stale(sb);
 	return ret;
 
 out_short_read:
 	printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n",
 			len, cur_offset);
+	clear_extent_buffer_uptodate(sb);
 	free_extent_buffer_stale(sb);
 	return -EIO;
 }
-- 
cgit v1.2.1


From 99e3ecfcb9f4ca35192d20a5bea158b81f600062 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Fri, 3 Jun 2016 12:05:14 -0700
Subject: Btrfs: add more validation checks for superblock

This adds validation checks for super_total_bytes, super_bytes_used and
super_stripesize, super_num_devices.

Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Reported-by: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'fs/btrfs/volumes.c')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 42ccde43053b..fd5c9e69894a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6651,6 +6651,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
 	struct btrfs_key found_key;
 	int ret;
 	int slot;
+	u64 total_dev = 0;
 
 	root = root->fs_info->chunk_root;
 
@@ -6692,6 +6693,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
 			ret = read_one_dev(root, leaf, dev_item);
 			if (ret)
 				goto error;
+			total_dev++;
 		} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
 			struct btrfs_chunk *chunk;
 			chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
@@ -6701,6 +6703,28 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
 		}
 		path->slots[0]++;
 	}
+
+	/*
+	 * After loading chunk tree, we've got all device information,
+	 * do another round of validation checks.
+	 */
+	if (total_dev != root->fs_info->fs_devices->total_devices) {
+		btrfs_err(root->fs_info,
+	   "super_num_devices %llu mismatch with num_devices %llu found here",
+			  btrfs_super_num_devices(root->fs_info->super_copy),
+			  total_dev);
+		ret = -EINVAL;
+		goto error;
+	}
+	if (btrfs_super_total_bytes(root->fs_info->super_copy) <
+	    root->fs_info->fs_devices->total_rw_bytes) {
+		btrfs_err(root->fs_info,
+	"super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu",
+			  btrfs_super_total_bytes(root->fs_info->super_copy),
+			  root->fs_info->fs_devices->total_rw_bytes);
+		ret = -EINVAL;
+		goto error;
+	}
 	ret = 0;
 error:
 	unlock_chunks(root);
-- 
cgit v1.2.1


From e06cd3dd7cea50e87663a88acdfdb7ac1c53a5ca Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Fri, 3 Jun 2016 12:05:15 -0700
Subject: Btrfs: add validadtion checks for chunk loading

To prevent fuzzed filesystem images from panic the whole system,
we need various validation checks to refuse to mount such an image
if btrfs finds any invalid value during loading chunks, including
both sys_array and regular chunks.

Note that these checks may not be sufficient to cover all corner cases,
feel free to add more checks.

Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Reported-by: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 82 ++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 67 insertions(+), 15 deletions(-)

(limited to 'fs/btrfs/volumes.c')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index fd5c9e69894a..74507b05061b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6251,27 +6251,23 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
 	return dev;
 }
 
-static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
-			  struct extent_buffer *leaf,
-			  struct btrfs_chunk *chunk)
+/* Return -EIO if any error, otherwise return 0. */
+static int btrfs_check_chunk_valid(struct btrfs_root *root,
+				   struct extent_buffer *leaf,
+				   struct btrfs_chunk *chunk, u64 logical)
 {
-	struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
-	struct map_lookup *map;
-	struct extent_map *em;
-	u64 logical;
 	u64 length;
 	u64 stripe_len;
-	u64 devid;
-	u8 uuid[BTRFS_UUID_SIZE];
-	int num_stripes;
-	int ret;
-	int i;
+	u16 num_stripes;
+	u16 sub_stripes;
+	u64 type;
 
-	logical = key->offset;
 	length = btrfs_chunk_length(leaf, chunk);
 	stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
 	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
-	/* Validation check */
+	sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+	type = btrfs_chunk_type(leaf, chunk);
+
 	if (!num_stripes) {
 		btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
 			  num_stripes);
@@ -6282,6 +6278,11 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
 			  "invalid chunk logical %llu", logical);
 		return -EIO;
 	}
+	if (btrfs_chunk_sector_size(leaf, chunk) != root->sectorsize) {
+		btrfs_err(root->fs_info, "invalid chunk sectorsize %u",
+			  btrfs_chunk_sector_size(leaf, chunk));
+		return -EIO;
+	}
 	if (!length || !IS_ALIGNED(length, root->sectorsize)) {
 		btrfs_err(root->fs_info,
 			"invalid chunk length %llu", length);
@@ -6293,13 +6294,54 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
 		return -EIO;
 	}
 	if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
-	    btrfs_chunk_type(leaf, chunk)) {
+	    type) {
 		btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
 			  ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
 			    BTRFS_BLOCK_GROUP_PROFILE_MASK) &
 			  btrfs_chunk_type(leaf, chunk));
 		return -EIO;
 	}
+	if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
+	    (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
+	    (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+	    (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
+	    (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
+	    ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
+	     num_stripes != 1)) {
+		btrfs_err(root->fs_info,
+			"invalid num_stripes:sub_stripes %u:%u for profile %llu",
+			num_stripes, sub_stripes,
+			type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
+			  struct extent_buffer *leaf,
+			  struct btrfs_chunk *chunk)
+{
+	struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+	struct map_lookup *map;
+	struct extent_map *em;
+	u64 logical;
+	u64 length;
+	u64 stripe_len;
+	u64 devid;
+	u8 uuid[BTRFS_UUID_SIZE];
+	int num_stripes;
+	int ret;
+	int i;
+
+	logical = key->offset;
+	length = btrfs_chunk_length(leaf, chunk);
+	stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+
+	ret = btrfs_check_chunk_valid(root, leaf, chunk, logical);
+	if (ret)
+		return ret;
 
 	read_lock(&map_tree->map_tree.lock);
 	em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6547,6 +6589,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
 	u32 array_size;
 	u32 len = 0;
 	u32 cur_offset;
+	u64 type;
 	struct btrfs_key key;
 
 	ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize);
@@ -6613,6 +6656,15 @@ int btrfs_read_sys_array(struct btrfs_root *root)
 				break;
 			}
 
+			type = btrfs_chunk_type(sb, chunk);
+			if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
+				btrfs_err(root->fs_info,
+			    "invalid chunk type %llu in sys_array at offset %u",
+					type, cur_offset);
+				ret = -EIO;
+				break;
+			}
+
 			len = btrfs_chunk_item_size(num_stripes);
 			if (cur_offset + len > array_size)
 				goto out_short_read;
-- 
cgit v1.2.1