summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2012-02-20 17:52:46 -0500
committerTheodore Ts'o <tytso@mit.edu>2012-02-20 17:52:46 -0500
commit813e57276fd909f7d5a816ef7ca706fca491ee61 (patch)
treecc2d262c0f6524f1a18d2a168d03e2ae3eb624ab
parent119c0d4460b001e44b41dcf73dc6ee794b98bd31 (diff)
downloadtalos-op-linux-813e57276fd909f7d5a816ef7ca706fca491ee61.tar.gz
talos-op-linux-813e57276fd909f7d5a816ef7ca706fca491ee61.zip
ext4: fix race when setting bitmap_uptodate flag
In ext4_read_{inode,block}_bitmap() we were setting bitmap_uptodate() before submitting the buffer for read. The is bad, since we check bitmap_uptodate() without locking the buffer, and so if another process is racing with us, it's possible that they will think the bitmap is uptodate even though the read has not completed yet, resulting in inodes and blocks potentially getting allocated more than once if we get really unlucky. Addresses-Google-Bug: 2828254 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/balloc.c63
-rw-r--r--fs/ext4/ext4.h11
-rw-r--r--fs/ext4/ialloc.c26
-rw-r--r--fs/ext4/mballoc.c79
4 files changed, 86 insertions, 93 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index f9e2cd8cf711..a2cff2b9d5b5 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -336,10 +336,10 @@ err_out:
* Return buffer_head on success or NULL in case of failure.
*/
struct buffer_head *
-ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
+ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
{
struct ext4_group_desc *desc;
- struct buffer_head *bh = NULL;
+ struct buffer_head *bh;
ext4_fsblk_t bitmap_blk;
desc = ext4_get_group_desc(sb, block_group, NULL);
@@ -348,9 +348,9 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
bitmap_blk = ext4_block_bitmap(sb, desc);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
- ext4_error(sb, "Cannot read block bitmap - "
- "block_group = %u, block_bitmap = %llu",
- block_group, bitmap_blk);
+ ext4_error(sb, "Cannot get buffer for block bitmap - "
+ "block_group = %u, block_bitmap = %llu",
+ block_group, bitmap_blk);
return NULL;
}
@@ -382,25 +382,50 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
return bh;
}
/*
- * submit the buffer_head for read. We can
- * safely mark the bitmap as uptodate now.
- * We do it here so the bitmap uptodate bit
- * get set with buffer lock held.
+ * submit the buffer_head for reading
*/
+ set_buffer_new(bh);
trace_ext4_read_block_bitmap_load(sb, block_group);
- set_bitmap_uptodate(bh);
- if (bh_submit_read(bh) < 0) {
- put_bh(bh);
+ bh->b_end_io = ext4_end_bitmap_read;
+ get_bh(bh);
+ submit_bh(READ, bh);
+ return bh;
+}
+
+/* Returns 0 on success, 1 on error */
+int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
+ struct buffer_head *bh)
+{
+ struct ext4_group_desc *desc;
+
+ if (!buffer_new(bh))
+ return 0;
+ desc = ext4_get_group_desc(sb, block_group, NULL);
+ if (!desc)
+ return 1;
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh)) {
ext4_error(sb, "Cannot read block bitmap - "
- "block_group = %u, block_bitmap = %llu",
- block_group, bitmap_blk);
- return NULL;
+ "block_group = %u, block_bitmap = %llu",
+ block_group, bh->b_blocknr);
+ return 1;
}
+ clear_buffer_new(bh);
+ /* Panic or remount fs read-only if block bitmap is invalid */
ext4_valid_block_bitmap(sb, desc, block_group, bh);
- /*
- * file system mounted not to panic on error,
- * continue with corrupt bitmap
- */
+ return 0;
+}
+
+struct buffer_head *
+ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
+{
+ struct buffer_head *bh;
+
+ bh = ext4_read_block_bitmap_nowait(sb, block_group);
+ if (ext4_wait_block_bitmap(sb, block_group, bh)) {
+ put_bh(bh);
+ return NULL;
+ }
return bh;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 513004fc3d84..315f01be73e6 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1794,8 +1794,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
ext4_group_t block_group,
struct buffer_head ** bh);
extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
-struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
- ext4_group_t block_group);
+
+extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
+ ext4_group_t block_group);
+extern int ext4_wait_block_bitmap(struct super_block *sb,
+ ext4_group_t block_group,
+ struct buffer_head *bh);
+extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
+ ext4_group_t block_group);
extern void ext4_init_block_bitmap(struct super_block *sb,
struct buffer_head *bh,
ext4_group_t group,
@@ -1841,6 +1847,7 @@ extern void ext4_check_inodes_bitmap(struct super_block *);
extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
extern int ext4_init_inode_table(struct super_block *sb,
ext4_group_t group, int barrier);
+extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
/* mballoc.c */
extern long ext4_mb_stats;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 84e6e9a3986b..b2685992fb2d 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -92,6 +92,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
return EXT4_INODES_PER_GROUP(sb);
}
+void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate)
+{
+ if (uptodate) {
+ set_buffer_uptodate(bh);
+ set_bitmap_uptodate(bh);
+ }
+ unlock_buffer(bh);
+ put_bh(bh);
+}
+
/*
* Read the inode allocation bitmap for a given block_group, reading
* into the specified slot in the superblock's bitmap cache.
@@ -147,18 +157,18 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
return bh;
}
/*
- * submit the buffer_head for read. We can
- * safely mark the bitmap as uptodate now.
- * We do it here so the bitmap uptodate bit
- * get set with buffer lock held.
+ * submit the buffer_head for reading
*/
trace_ext4_load_inode_bitmap(sb, block_group);
- set_bitmap_uptodate(bh);
- if (bh_submit_read(bh) < 0) {
+ bh->b_end_io = ext4_end_bitmap_read;
+ get_bh(bh);
+ submit_bh(READ, bh);
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh)) {
put_bh(bh);
ext4_error(sb, "Cannot read inode bitmap - "
- "block_group = %u, inode_bitmap = %llu",
- block_group, bitmap_blk);
+ "block_group = %u, inode_bitmap = %llu",
+ block_group, bitmap_blk);
return NULL;
}
return bh;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index cb990b21c698..545fa0256606 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -782,7 +782,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
int groups_per_page;
int err = 0;
int i;
- ext4_group_t first_group;
+ ext4_group_t first_group, group;
int first_block;
struct super_block *sb;
struct buffer_head *bhs;
@@ -806,24 +806,23 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
/* allocate buffer_heads to read bitmaps */
if (groups_per_page > 1) {
- err = -ENOMEM;
i = sizeof(struct buffer_head *) * groups_per_page;
bh = kzalloc(i, GFP_NOFS);
- if (bh == NULL)
+ if (bh == NULL) {
+ err = -ENOMEM;
goto out;
+ }
} else
bh = &bhs;
first_group = page->index * blocks_per_page / 2;
/* read all groups the page covers into the cache */
- for (i = 0; i < groups_per_page; i++) {
- struct ext4_group_desc *desc;
-
- if (first_group + i >= ngroups)
+ for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
+ if (group >= ngroups)
break;
- grinfo = ext4_get_group_info(sb, first_group + i);
+ grinfo = ext4_get_group_info(sb, group);
/*
* If page is uptodate then we came here after online resize
* which added some new uninitialized group info structs, so
@@ -834,69 +833,21 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
bh[i] = NULL;
continue;
}
-
- err = -EIO;
- desc = ext4_get_group_desc(sb, first_group + i, NULL);
- if (desc == NULL)
- goto out;
-
- err = -ENOMEM;
- bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc));
- if (bh[i] == NULL)
+ if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) {
+ err = -ENOMEM;
goto out;
-
- if (bitmap_uptodate(bh[i]))
- continue;
-
- lock_buffer(bh[i]);
- if (bitmap_uptodate(bh[i])) {
- unlock_buffer(bh[i]);
- continue;
}
- ext4_lock_group(sb, first_group + i);
- if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
- ext4_init_block_bitmap(sb, bh[i],
- first_group + i, desc);
- set_bitmap_uptodate(bh[i]);
- set_buffer_uptodate(bh[i]);
- ext4_unlock_group(sb, first_group + i);
- unlock_buffer(bh[i]);
- continue;
- }
- ext4_unlock_group(sb, first_group + i);
- if (buffer_uptodate(bh[i])) {
- /*
- * if not uninit if bh is uptodate,
- * bitmap is also uptodate
- */
- set_bitmap_uptodate(bh[i]);
- unlock_buffer(bh[i]);
- continue;
- }
- get_bh(bh[i]);
- /*
- * submit the buffer_head for read. We can
- * safely mark the bitmap as uptodate now.
- * We do it here so the bitmap uptodate bit
- * get set with buffer lock held.
- */
- set_bitmap_uptodate(bh[i]);
- bh[i]->b_end_io = end_buffer_read_sync;
- submit_bh(READ, bh[i]);
- mb_debug(1, "read bitmap for group %u\n", first_group + i);
+ mb_debug(1, "read bitmap for group %u\n", group);
}
/* wait for I/O completion */
- for (i = 0; i < groups_per_page; i++)
- if (bh[i])
- wait_on_buffer(bh[i]);
-
- err = -EIO;
- for (i = 0; i < groups_per_page; i++)
- if (bh[i] && !buffer_uptodate(bh[i]))
+ for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
+ if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
+ err = -EIO;
goto out;
+ }
+ }
- err = 0;
first_block = page->index * blocks_per_page;
for (i = 0; i < blocks_per_page; i++) {
int group;
OpenPOWER on IntegriCloud