summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorJeff Garzik <jgarzik@pobox.com>2005-08-10 13:46:28 -0400
committerJeff Garzik <jgarzik@pobox.com>2005-08-10 13:46:28 -0400
commit2f058256cb64e346f4fb4499ff4e0f1c2791a4b4 (patch)
tree91e06602f4d3abb6812ea8c9bc9ba4501e14c84e /drivers/md
parent0274aa2506fd2fe89a58dd6cd64d3b3f7b976af8 (diff)
parent86b3786078d63242d3194ffc58ae8dae1d1bbef3 (diff)
downloadblackbird-op-linux-2f058256cb64e346f4fb4499ff4e0f1c2791a4b4.tar.gz
blackbird-op-linux-2f058256cb64e346f4fb4499ff4e0f1c2791a4b4.zip
Merge /spare/repo/linux-2.6/
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bitmap.c92
-rw-r--r--drivers/md/dm-ioctl.c14
-rw-r--r--drivers/md/dm-mpath.c68
-rw-r--r--drivers/md/dm-raid1.c3
-rw-r--r--drivers/md/dm-snap.c6
-rw-r--r--drivers/md/dm-table.c7
-rw-r--r--drivers/md/dm.c207
-rw-r--r--drivers/md/md.c10
-rw-r--r--drivers/md/raid0.c8
-rw-r--r--drivers/md/raid1.c63
-rw-r--r--drivers/md/raid5.c2
-rw-r--r--drivers/md/raid6main.c2
12 files changed, 256 insertions, 226 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 95980ad6b27b..41df4cda66e2 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -108,7 +108,7 @@ static unsigned char *bitmap_alloc_page(struct bitmap *bitmap)
{
unsigned char *page;
-#if INJECT_FAULTS_1
+#ifdef INJECT_FAULTS_1
page = NULL;
#else
page = kmalloc(PAGE_SIZE, GFP_NOIO);
@@ -818,8 +818,7 @@ int bitmap_unplug(struct bitmap *bitmap)
return 0;
}
-static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
- unsigned long sectors, int in_sync);
+static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset);
/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
* the in-memory bitmap from the on-disk bitmap -- also, sets up the
* memory mapping of the bitmap file
@@ -828,7 +827,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
* previously kicked from the array, we mark all the bits as
* 1's in order to cause a full resync.
*/
-static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
+static int bitmap_init_from_disk(struct bitmap *bitmap)
{
unsigned long i, chunks, index, oldindex, bit;
struct page *page = NULL, *oldpage = NULL;
@@ -843,7 +842,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
BUG_ON(!file && !bitmap->offset);
-#if INJECT_FAULTS_3
+#ifdef INJECT_FAULTS_3
outofdate = 1;
#else
outofdate = bitmap->flags & BITMAP_STALE;
@@ -929,8 +928,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
}
if (test_bit(bit, page_address(page))) {
/* if the disk bit is set, set the memory bit */
- bitmap_set_memory_bits(bitmap,
- i << CHUNK_BLOCK_SHIFT(bitmap), 1, in_sync);
+ bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap));
bit_cnt++;
}
}
@@ -1187,7 +1185,7 @@ static int bitmap_start_daemon(struct bitmap *bitmap, mdk_thread_t **ptr,
spin_unlock_irqrestore(&bitmap->lock, flags);
-#if INJECT_FATAL_FAULT_2
+#ifdef INJECT_FATAL_FAULT_2
daemon = NULL;
#else
sprintf(namebuf, "%%s_%s", name);
@@ -1345,7 +1343,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
}
}
-int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks)
+int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
+ int degraded)
{
bitmap_counter_t *bmc;
int rv;
@@ -1362,8 +1361,10 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks)
rv = 1;
else if (NEEDED(*bmc)) {
rv = 1;
- *bmc |= RESYNC_MASK;
- *bmc &= ~NEEDED_MASK;
+ if (!degraded) { /* don't set/clear bits if degraded */
+ *bmc |= RESYNC_MASK;
+ *bmc &= ~NEEDED_MASK;
+ }
}
}
spin_unlock_irq(&bitmap->lock);
@@ -1423,35 +1424,53 @@ void bitmap_close_sync(struct bitmap *bitmap)
}
}
-static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
- unsigned long sectors, int in_sync)
+static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset)
{
/* For each chunk covered by any of these sectors, set the
- * counter to 1 and set resync_needed unless in_sync. They should all
+ * counter to 1 and set resync_needed. They should all
* be 0 at this point
*/
- while (sectors) {
- int secs;
- bitmap_counter_t *bmc;
- spin_lock_irq(&bitmap->lock);
- bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
- if (!bmc) {
- spin_unlock_irq(&bitmap->lock);
- return;
- }
- if (! *bmc) {
- struct page *page;
- *bmc = 1 | (in_sync? 0 : NEEDED_MASK);
- bitmap_count_page(bitmap, offset, 1);
- page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
- set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
- }
+
+ int secs;
+ bitmap_counter_t *bmc;
+ spin_lock_irq(&bitmap->lock);
+ bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
+ if (!bmc) {
spin_unlock_irq(&bitmap->lock);
- if (sectors > secs)
- sectors -= secs;
- else
- sectors = 0;
+ return;
+ }
+ if (! *bmc) {
+ struct page *page;
+ *bmc = 1 | NEEDED_MASK;
+ bitmap_count_page(bitmap, offset, 1);
+ page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
+ set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
}
+ spin_unlock_irq(&bitmap->lock);
+
+}
+
+/*
+ * flush out any pending updates
+ */
+void bitmap_flush(mddev_t *mddev)
+{
+ struct bitmap *bitmap = mddev->bitmap;
+ int sleep;
+
+ if (!bitmap) /* there was no bitmap */
+ return;
+
+ /* run the daemon_work three time to ensure everything is flushed
+ * that can be
+ */
+ sleep = bitmap->daemon_sleep;
+ bitmap->daemon_sleep = 0;
+ bitmap_daemon_work(bitmap);
+ bitmap_daemon_work(bitmap);
+ bitmap_daemon_work(bitmap);
+ bitmap->daemon_sleep = sleep;
+ bitmap_update_sb(bitmap);
}
/*
@@ -1549,7 +1568,7 @@ int bitmap_create(mddev_t *mddev)
bitmap->syncchunk = ~0UL;
-#if INJECT_FATAL_FAULT_1
+#ifdef INJECT_FATAL_FAULT_1
bitmap->bp = NULL;
#else
bitmap->bp = kmalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL);
@@ -1562,7 +1581,8 @@ int bitmap_create(mddev_t *mddev)
/* now that we have some pages available, initialize the in-memory
* bitmap from the on-disk bitmap */
- err = bitmap_init_from_disk(bitmap, mddev->recovery_cp == MaxSector);
+ err = bitmap_init_from_disk(bitmap);
+
if (err)
return err;
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index ee3c869d9701..200a0688f717 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -122,14 +122,6 @@ static struct hash_cell *__get_uuid_cell(const char *str)
/*-----------------------------------------------------------------
* Inserting, removing and renaming a device.
*---------------------------------------------------------------*/
-static inline char *kstrdup(const char *str)
-{
- char *r = kmalloc(strlen(str) + 1, GFP_KERNEL);
- if (r)
- strcpy(r, str);
- return r;
-}
-
static struct hash_cell *alloc_cell(const char *name, const char *uuid,
struct mapped_device *md)
{
@@ -139,7 +131,7 @@ static struct hash_cell *alloc_cell(const char *name, const char *uuid,
if (!hc)
return NULL;
- hc->name = kstrdup(name);
+ hc->name = kstrdup(name, GFP_KERNEL);
if (!hc->name) {
kfree(hc);
return NULL;
@@ -149,7 +141,7 @@ static struct hash_cell *alloc_cell(const char *name, const char *uuid,
hc->uuid = NULL;
else {
- hc->uuid = kstrdup(uuid);
+ hc->uuid = kstrdup(uuid, GFP_KERNEL);
if (!hc->uuid) {
kfree(hc->name);
kfree(hc);
@@ -273,7 +265,7 @@ static int dm_hash_rename(const char *old, const char *new)
/*
* duplicate new.
*/
- new_name = kstrdup(new);
+ new_name = kstrdup(new, GFP_KERNEL);
if (!new_name)
return -ENOMEM;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 0c1b8520ef86..785806bdb248 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -63,6 +63,7 @@ struct multipath {
unsigned nr_priority_groups;
struct list_head priority_groups;
unsigned pg_init_required; /* pg_init needs calling? */
+ unsigned pg_init_in_progress; /* Only one pg_init allowed at once */
unsigned nr_valid_paths; /* Total number of usable paths */
struct pgpath *current_pgpath;
@@ -72,7 +73,7 @@ struct multipath {
unsigned queue_io; /* Must we queue all I/O? */
unsigned queue_if_no_path; /* Queue I/O if last path fails? */
- unsigned suspended; /* Has dm core suspended our I/O? */
+ unsigned saved_queue_if_no_path;/* Saved state during suspension */
struct work_struct process_queued_ios;
struct bio_list queued_ios;
@@ -304,11 +305,12 @@ static int map_io(struct multipath *m, struct bio *bio, struct mpath_io *mpio,
m->queue_size--;
if ((pgpath && m->queue_io) ||
- (!pgpath && m->queue_if_no_path && !m->suspended)) {
+ (!pgpath && m->queue_if_no_path)) {
/* Queue for the daemon to resubmit */
bio_list_add(&m->queued_ios, bio);
m->queue_size++;
- if (m->pg_init_required || !m->queue_io)
+ if ((m->pg_init_required && !m->pg_init_in_progress) ||
+ !m->queue_io)
queue_work(kmultipathd, &m->process_queued_ios);
pgpath = NULL;
r = 0;
@@ -333,8 +335,9 @@ static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path)
spin_lock_irqsave(&m->lock, flags);
+ m->saved_queue_if_no_path = m->queue_if_no_path;
m->queue_if_no_path = queue_if_no_path;
- if (!m->queue_if_no_path)
+ if (!m->queue_if_no_path && m->queue_size)
queue_work(kmultipathd, &m->process_queued_ios);
spin_unlock_irqrestore(&m->lock, flags);
@@ -379,25 +382,31 @@ static void process_queued_ios(void *data)
{
struct multipath *m = (struct multipath *) data;
struct hw_handler *hwh = &m->hw_handler;
- struct pgpath *pgpath;
- unsigned init_required, must_queue = 0;
+ struct pgpath *pgpath = NULL;
+ unsigned init_required = 0, must_queue = 1;
unsigned long flags;
spin_lock_irqsave(&m->lock, flags);
+ if (!m->queue_size)
+ goto out;
+
if (!m->current_pgpath)
__choose_pgpath(m);
pgpath = m->current_pgpath;
- if ((pgpath && m->queue_io) ||
- (!pgpath && m->queue_if_no_path && !m->suspended))
- must_queue = 1;
+ if ((pgpath && !m->queue_io) ||
+ (!pgpath && !m->queue_if_no_path))
+ must_queue = 0;
- init_required = m->pg_init_required;
- if (init_required)
+ if (m->pg_init_required && !m->pg_init_in_progress) {
m->pg_init_required = 0;
+ m->pg_init_in_progress = 1;
+ init_required = 1;
+ }
+out:
spin_unlock_irqrestore(&m->lock, flags);
if (init_required)
@@ -752,6 +761,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
static void multipath_dtr(struct dm_target *ti)
{
struct multipath *m = (struct multipath *) ti->private;
+
+ flush_workqueue(kmultipathd);
free_multipath(m);
}
@@ -765,6 +776,9 @@ static int multipath_map(struct dm_target *ti, struct bio *bio,
struct mpath_io *mpio;
struct multipath *m = (struct multipath *) ti->private;
+ if (bio_barrier(bio))
+ return -EOPNOTSUPP;
+
mpio = mempool_alloc(m->mpio_pool, GFP_NOIO);
dm_bio_record(&mpio->details, bio);
@@ -837,7 +851,7 @@ static int reinstate_path(struct pgpath *pgpath)
pgpath->path.is_active = 1;
m->current_pgpath = NULL;
- if (!m->nr_valid_paths++)
+ if (!m->nr_valid_paths++ && m->queue_size)
queue_work(kmultipathd, &m->process_queued_ios);
queue_work(kmultipathd, &m->trigger_event);
@@ -963,12 +977,13 @@ void dm_pg_init_complete(struct path *path, unsigned err_flags)
bypass_pg(m, pg, 1);
spin_lock_irqsave(&m->lock, flags);
- if (!err_flags)
- m->queue_io = 0;
- else {
+ if (err_flags) {
m->current_pgpath = NULL;
m->current_pg = NULL;
- }
+ } else if (!m->pg_init_required)
+ m->queue_io = 0;
+
+ m->pg_init_in_progress = 0;
queue_work(kmultipathd, &m->process_queued_ios);
spin_unlock_irqrestore(&m->lock, flags);
}
@@ -988,9 +1003,12 @@ static int do_end_io(struct multipath *m, struct bio *bio,
if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
return error;
+ if (error == -EOPNOTSUPP)
+ return error;
+
spin_lock(&m->lock);
if (!m->nr_valid_paths) {
- if (!m->queue_if_no_path || m->suspended) {
+ if (!m->queue_if_no_path) {
spin_unlock(&m->lock);
return -EIO;
} else {
@@ -1051,27 +1069,27 @@ static int multipath_end_io(struct dm_target *ti, struct bio *bio,
/*
* Suspend can't complete until all the I/O is processed so if
- * the last path failed we will now error any queued I/O.
+ * the last path fails we must error any remaining I/O.
+ * Note that if the freeze_bdev fails while suspending, the
+ * queue_if_no_path state is lost - userspace should reset it.
*/
static void multipath_presuspend(struct dm_target *ti)
{
struct multipath *m = (struct multipath *) ti->private;
- unsigned long flags;
- spin_lock_irqsave(&m->lock, flags);
- m->suspended = 1;
- if (m->queue_if_no_path)
- queue_work(kmultipathd, &m->process_queued_ios);
- spin_unlock_irqrestore(&m->lock, flags);
+ queue_if_no_path(m, 0);
}
+/*
+ * Restore the queue_if_no_path setting.
+ */
static void multipath_resume(struct dm_target *ti)
{
struct multipath *m = (struct multipath *) ti->private;
unsigned long flags;
spin_lock_irqsave(&m->lock, flags);
- m->suspended = 0;
+ m->queue_if_no_path = m->saved_queue_if_no_path;
spin_unlock_irqrestore(&m->lock, flags);
}
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 6e3cf7e13451..b08df8b9b2ca 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1060,6 +1060,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
ti->private = ms;
+ ti->split_io = ms->rh.region_size;
r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client);
if (r) {
@@ -1229,7 +1230,7 @@ static int __init dm_mirror_init(void)
if (r)
return r;
- _kmirrord_wq = create_workqueue("kmirrord");
+ _kmirrord_wq = create_singlethread_workqueue("kmirrord");
if (!_kmirrord_wq) {
DMERR("couldn't start kmirrord");
dm_dirty_log_exit();
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 7e691ab9a748..ab54f99b7c3b 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -777,7 +777,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
/* Full snapshots are not usable */
if (!s->valid)
- return -1;
+ return -EIO;
/*
* Write to snapshot - higher level takes care of RW/RO
@@ -931,6 +931,10 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
if (!snap->valid)
continue;
+ /* Nothing to do if writing beyond end of snapshot */
+ if (bio->bi_sector >= dm_table_get_size(snap->table))
+ continue;
+
down_write(&snap->lock);
/*
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 18e9b9953fcd..a6d3baa46f61 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -869,11 +869,17 @@ static void suspend_targets(struct dm_table *t, unsigned postsuspend)
void dm_table_presuspend_targets(struct dm_table *t)
{
+ if (!t)
+ return;
+
return suspend_targets(t, 0);
}
void dm_table_postsuspend_targets(struct dm_table *t)
{
+ if (!t)
+ return;
+
return suspend_targets(t, 1);
}
@@ -943,6 +949,7 @@ EXPORT_SYMBOL(dm_vcalloc);
EXPORT_SYMBOL(dm_get_device);
EXPORT_SYMBOL(dm_put_device);
EXPORT_SYMBOL(dm_table_event);
+EXPORT_SYMBOL(dm_table_get_size);
EXPORT_SYMBOL(dm_table_get_mode);
EXPORT_SYMBOL(dm_table_put);
EXPORT_SYMBOL(dm_table_get);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f6b03957efc7..d487d9deb98e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -55,10 +55,10 @@ union map_info *dm_get_mapinfo(struct bio *bio)
*/
#define DMF_BLOCK_IO 0
#define DMF_SUSPENDED 1
-#define DMF_FS_LOCKED 2
struct mapped_device {
- struct rw_semaphore lock;
+ struct rw_semaphore io_lock;
+ struct semaphore suspend_lock;
rwlock_t map_lock;
atomic_t holders;
@@ -248,16 +248,16 @@ static inline void free_tio(struct mapped_device *md, struct target_io *tio)
*/
static int queue_io(struct mapped_device *md, struct bio *bio)
{
- down_write(&md->lock);
+ down_write(&md->io_lock);
if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
- up_write(&md->lock);
+ up_write(&md->io_lock);
return 1;
}
bio_list_add(&md->deferred, bio);
- up_write(&md->lock);
+ up_write(&md->io_lock);
return 0; /* deferred successfully */
}
@@ -384,7 +384,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
/* error the io and bail out */
struct dm_io *io = tio->io;
free_tio(tio->io->md, tio);
- dec_pending(io, -EIO);
+ dec_pending(io, r);
bio_put(clone);
}
}
@@ -568,14 +568,14 @@ static int dm_request(request_queue_t *q, struct bio *bio)
int r;
struct mapped_device *md = q->queuedata;
- down_read(&md->lock);
+ down_read(&md->io_lock);
/*
* If we're suspended we have to queue
* this io for later.
*/
while (test_bit(DMF_BLOCK_IO, &md->flags)) {
- up_read(&md->lock);
+ up_read(&md->io_lock);
if (bio_rw(bio) == READA) {
bio_io_error(bio, bio->bi_size);
@@ -594,11 +594,11 @@ static int dm_request(request_queue_t *q, struct bio *bio)
* We're in a while loop, because someone could suspend
* before we get to the following read lock.
*/
- down_read(&md->lock);
+ down_read(&md->io_lock);
}
__split_bio(md, bio);
- up_read(&md->lock);
+ up_read(&md->io_lock);
return 0;
}
@@ -610,7 +610,7 @@ static int dm_flush_all(request_queue_t *q, struct gendisk *disk,
int ret = -ENXIO;
if (map) {
- ret = dm_table_flush_all(md->map);
+ ret = dm_table_flush_all(map);
dm_table_put(map);
}
@@ -747,7 +747,8 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent)
goto bad1;
memset(md, 0, sizeof(*md));
- init_rwsem(&md->lock);
+ init_rwsem(&md->io_lock);
+ init_MUTEX(&md->suspend_lock);
rwlock_init(&md->map_lock);
atomic_set(&md->holders, 1);
atomic_set(&md->event_nr, 0);
@@ -825,18 +826,13 @@ static void event_callback(void *context)
wake_up(&md->eventq);
}
-static void __set_size(struct gendisk *disk, sector_t size)
+static void __set_size(struct mapped_device *md, sector_t size)
{
- struct block_device *bdev;
-
- set_capacity(disk, size);
- bdev = bdget_disk(disk, 0);
- if (bdev) {
- down(&bdev->bd_inode->i_sem);
- i_size_write(bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
- up(&bdev->bd_inode->i_sem);
- bdput(bdev);
- }
+ set_capacity(md->disk, size);
+
+ down(&md->frozen_bdev->bd_inode->i_sem);
+ i_size_write(md->frozen_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
+ up(&md->frozen_bdev->bd_inode->i_sem);
}
static int __bind(struct mapped_device *md, struct dm_table *t)
@@ -845,17 +841,18 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
sector_t size;
size = dm_table_get_size(t);
- __set_size(md->disk, size);
+ __set_size(md, size);
if (size == 0)
return 0;
+ dm_table_get(t);
+ dm_table_event_callback(t, event_callback, md);
+
write_lock(&md->map_lock);
md->map = t;
+ dm_table_set_restrictions(t, q);
write_unlock(&md->map_lock);
- dm_table_get(t);
- dm_table_event_callback(md->map, event_callback, md);
- dm_table_set_restrictions(t, q);
return 0;
}
@@ -935,7 +932,7 @@ void dm_put(struct mapped_device *md)
struct dm_table *map = dm_get_table(md);
if (atomic_dec_and_test(&md->holders)) {
- if (!test_bit(DMF_SUSPENDED, &md->flags) && map) {
+ if (!dm_suspended(md)) {
dm_table_presuspend_targets(map);
dm_table_postsuspend_targets(map);
}
@@ -966,39 +963,33 @@ static void __flush_deferred_io(struct mapped_device *md, struct bio *c)
*/
int dm_swap_table(struct mapped_device *md, struct dm_table *table)
{
- int r;
+ int r = -EINVAL;
- down_write(&md->lock);
+ down(&md->suspend_lock);
/* device must be suspended */
- if (!test_bit(DMF_SUSPENDED, &md->flags)) {
- up_write(&md->lock);
- return -EPERM;
- }
+ if (!dm_suspended(md))
+ goto out;
__unbind(md);
r = __bind(md, table);
- if (r)
- return r;
- up_write(&md->lock);
- return 0;
+out:
+ up(&md->suspend_lock);
+ return r;
}
/*
* Functions to lock and unlock any filesystem running on the
* device.
*/
-static int __lock_fs(struct mapped_device *md)
+static int lock_fs(struct mapped_device *md)
{
- int error = -ENOMEM;
-
- if (test_and_set_bit(DMF_FS_LOCKED, &md->flags))
- return 0;
+ int r = -ENOMEM;
md->frozen_bdev = bdget_disk(md->disk, 0);
if (!md->frozen_bdev) {
- DMWARN("bdget failed in __lock_fs");
+ DMWARN("bdget failed in lock_fs");
goto out;
}
@@ -1006,13 +997,13 @@ static int __lock_fs(struct mapped_device *md)
md->frozen_sb = freeze_bdev(md->frozen_bdev);
if (IS_ERR(md->frozen_sb)) {
- error = PTR_ERR(md->frozen_sb);
+ r = PTR_ERR(md->frozen_sb);
goto out_bdput;
}
/* don't bdput right now, we don't want the bdev
* to go away while it is locked. We'll bdput
- * in __unlock_fs
+ * in unlock_fs
*/
return 0;
@@ -1021,15 +1012,11 @@ out_bdput:
md->frozen_sb = NULL;
md->frozen_bdev = NULL;
out:
- clear_bit(DMF_FS_LOCKED, &md->flags);
- return error;
+ return r;
}
-static void __unlock_fs(struct mapped_device *md)
+static void unlock_fs(struct mapped_device *md)
{
- if (!test_and_clear_bit(DMF_FS_LOCKED, &md->flags))
- return;
-
thaw_bdev(md->frozen_bdev, md->frozen_sb);
bdput(md->frozen_bdev);
@@ -1046,47 +1033,37 @@ static void __unlock_fs(struct mapped_device *md)
*/
int dm_suspend(struct mapped_device *md)
{
- struct dm_table *map;
+ struct dm_table *map = NULL;
DECLARE_WAITQUEUE(wait, current);
- int error = -EINVAL;
+ int r = -EINVAL;
- /* Flush I/O to the device. */
- down_read(&md->lock);
- if (test_bit(DMF_BLOCK_IO, &md->flags))
- goto out_read_unlock;
+ down(&md->suspend_lock);
- error = __lock_fs(md);
- if (error)
- goto out_read_unlock;
+ if (dm_suspended(md))
+ goto out;
map = dm_get_table(md);
- if (map)
- dm_table_presuspend_targets(map);
- up_read(&md->lock);
+ /* This does not get reverted if there's an error later. */
+ dm_table_presuspend_targets(map);
+
+ /* Flush I/O to the device. */
+ r = lock_fs(md);
+ if (r)
+ goto out;
/*
* First we set the BLOCK_IO flag so no more ios will be mapped.
- *
- * If the flag is already set we know another thread is trying to
- * suspend as well, so we leave the fs locked for this thread.
*/
- error = -EINVAL;
- down_write(&md->lock);
- if (test_and_set_bit(DMF_BLOCK_IO, &md->flags)) {
- if (map)
- dm_table_put(map);
- goto out_write_unlock;
- }
+ down_write(&md->io_lock);
+ set_bit(DMF_BLOCK_IO, &md->flags);
add_wait_queue(&md->wait, &wait);
- up_write(&md->lock);
+ up_write(&md->io_lock);
/* unplug */
- if (map) {
+ if (map)
dm_table_unplug_all(map);
- dm_table_put(map);
- }
/*
* Then we wait for the already mapped ios to
@@ -1102,63 +1079,67 @@ int dm_suspend(struct mapped_device *md)
}
set_current_state(TASK_RUNNING);
- down_write(&md->lock);
+ down_write(&md->io_lock);
remove_wait_queue(&md->wait, &wait);
/* were we interrupted ? */
- error = -EINTR;
- if (atomic_read(&md->pending))
- goto out_unfreeze;
+ r = -EINTR;
+ if (atomic_read(&md->pending)) {
+ up_write(&md->io_lock);
+ unlock_fs(md);
+ clear_bit(DMF_BLOCK_IO, &md->flags);
+ goto out;
+ }
+ up_write(&md->io_lock);
- set_bit(DMF_SUSPENDED, &md->flags);
+ dm_table_postsuspend_targets(map);
- map = dm_get_table(md);
- if (map)
- dm_table_postsuspend_targets(map);
- dm_table_put(map);
- up_write(&md->lock);
-
- return 0;
+ set_bit(DMF_SUSPENDED, &md->flags);
-out_unfreeze:
- /* FIXME Undo dm_table_presuspend_targets */
- __unlock_fs(md);
- clear_bit(DMF_BLOCK_IO, &md->flags);
-out_write_unlock:
- up_write(&md->lock);
- return error;
+ r = 0;
-out_read_unlock:
- up_read(&md->lock);
- return error;
+out:
+ dm_table_put(map);
+ up(&md->suspend_lock);
+ return r;
}
int dm_resume(struct mapped_device *md)
{
+ int r = -EINVAL;
struct bio *def;
- struct dm_table *map = dm_get_table(md);
+ struct dm_table *map = NULL;
- down_write(&md->lock);
- if (!map ||
- !test_bit(DMF_SUSPENDED, &md->flags) ||
- !dm_table_get_size(map)) {
- up_write(&md->lock);
- dm_table_put(map);
- return -EINVAL;
- }
+ down(&md->suspend_lock);
+ if (!dm_suspended(md))
+ goto out;
+
+ map = dm_get_table(md);
+ if (!map || !dm_table_get_size(map))
+ goto out;
dm_table_resume_targets(map);
- clear_bit(DMF_SUSPENDED, &md->flags);
+
+ down_write(&md->io_lock);
clear_bit(DMF_BLOCK_IO, &md->flags);
def = bio_list_get(&md->deferred);
__flush_deferred_io(md, def);
- up_write(&md->lock);
- __unlock_fs(md);
+ up_write(&md->io_lock);
+
+ unlock_fs(md);
+
+ clear_bit(DMF_SUSPENDED, &md->flags);
+
dm_table_unplug_all(map);
+
+ r = 0;
+
+out:
dm_table_put(map);
+ up(&md->suspend_lock);
- return 0;
+ return r;
}
/*-----------------------------------------------------------------
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0c6b5b6baff6..480f658db6f2 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -284,7 +284,7 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
return NULL;
}
-inline static sector_t calc_dev_sboffset(struct block_device *bdev)
+static inline sector_t calc_dev_sboffset(struct block_device *bdev)
{
sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
return MD_NEW_SIZE_BLOCKS(size);
@@ -338,6 +338,7 @@ static int super_written(struct bio *bio, unsigned int bytes_done, int error)
if (atomic_dec_and_test(&rdev->mddev->pending_writes))
wake_up(&rdev->mddev->sb_wait);
+ bio_put(bio);
return 0;
}
@@ -1797,6 +1798,8 @@ static int do_md_stop(mddev_t * mddev, int ro)
goto out;
mddev->ro = 1;
} else {
+ bitmap_flush(mddev);
+ wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
if (mddev->ro)
set_disk_ro(disk, 0);
blk_queue_make_request(mddev->queue, md_fail_request);
@@ -2976,8 +2979,7 @@ static int md_thread(void * arg)
wait_event_interruptible_timeout(thread->wqueue,
test_bit(THREAD_WAKEUP, &thread->flags),
thread->timeout);
- if (current->flags & PF_FREEZE)
- refrigerator(PF_FREEZE);
+ try_to_freeze();
clear_bit(THREAD_WAKEUP, &thread->flags);
@@ -3484,7 +3486,6 @@ static void md_do_sync(mddev_t *mddev)
goto skip;
}
ITERATE_MDDEV(mddev2,tmp) {
- printk(".");
if (mddev2 == mddev)
continue;
if (mddev2->curr_resync &&
@@ -4007,3 +4008,4 @@ EXPORT_SYMBOL(md_wakeup_thread);
EXPORT_SYMBOL(md_print_devices);
EXPORT_SYMBOL(md_check_recovery);
MODULE_LICENSE("GPL");
+MODULE_ALIAS("md");
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index e11dd14d0b43..2120710172c5 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -314,16 +314,16 @@ static int raid0_run (mddev_t *mddev)
sector_t space = conf->hash_spacing;
int round;
conf->preshift = 0;
- if (sizeof(sector_t) > sizeof(unsigned long)) {
+ if (sizeof(sector_t) > sizeof(u32)) {
/*shift down space and s so that sector_div will work */
- while (space > (sector_t) (~(unsigned long)0)) {
+ while (space > (sector_t) (~(u32)0)) {
s >>= 1;
space >>= 1;
s += 1; /* force round-up */
conf->preshift++;
}
}
- round = sector_div(s, (unsigned long)space) ? 1 : 0;
+ round = sector_div(s, (u32)space) ? 1 : 0;
nb_zone = s + round;
}
printk("raid0 : nb_zone is %d.\n", nb_zone);
@@ -443,7 +443,7 @@ static int raid0_make_request (request_queue_t *q, struct bio *bio)
volatile
#endif
sector_t x = block >> conf->preshift;
- sector_div(x, (unsigned long)conf->hash_spacing);
+ sector_div(x, (u32)conf->hash_spacing);
zone = conf->hash_table[x];
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ff1dbec864af..51d9645ed09c 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -893,7 +893,6 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
if (!uptodate) {
md_error(r1_bio->mddev,
conf->mirrors[r1_bio->read_disk].rdev);
- set_bit(R1BIO_Degraded, &r1_bio->state);
} else
set_bit(R1BIO_Uptodate, &r1_bio->state);
rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev);
@@ -918,10 +917,9 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error)
mirror = i;
break;
}
- if (!uptodate) {
+ if (!uptodate)
md_error(mddev, conf->mirrors[mirror].rdev);
- set_bit(R1BIO_Degraded, &r1_bio->state);
- }
+
update_head_pos(mirror, r1_bio);
if (atomic_dec_and_test(&r1_bio->remaining)) {
@@ -1109,6 +1107,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
int i;
int write_targets = 0;
int sync_blocks;
+ int still_degraded = 0;
if (!conf->r1buf_pool)
{
@@ -1126,21 +1125,22 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
* only be one in raid1 resync.
* We can find the current addess in mddev->curr_resync
*/
- if (!conf->fullsync) {
- if (mddev->curr_resync < max_sector)
- bitmap_end_sync(mddev->bitmap,
- mddev->curr_resync,
+ if (mddev->curr_resync < max_sector) /* aborted */
+ bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
&sync_blocks, 1);
- bitmap_close_sync(mddev->bitmap);
- }
- if (mddev->curr_resync >= max_sector)
+ else /* completed sync */
conf->fullsync = 0;
+
+ bitmap_close_sync(mddev->bitmap);
close_sync(conf);
return 0;
}
- if (!conf->fullsync &&
- !bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks)) {
+ /* before building a request, check if we can skip these blocks..
+ * This call the bitmap_start_sync doesn't actually record anything
+ */
+ if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
+ !conf->fullsync) {
/* We can skip this block, and probably several more */
*skipped = 1;
return sync_blocks;
@@ -1205,24 +1205,23 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
if (i == disk) {
bio->bi_rw = READ;
bio->bi_end_io = end_sync_read;
- } else if (conf->mirrors[i].rdev &&
- !conf->mirrors[i].rdev->faulty &&
- (!conf->mirrors[i].rdev->in_sync ||
- sector_nr + RESYNC_SECTORS > mddev->recovery_cp)) {
+ } else if (conf->mirrors[i].rdev == NULL ||
+ conf->mirrors[i].rdev->faulty) {
+ still_degraded = 1;
+ continue;
+ } else if (!conf->mirrors[i].rdev->in_sync ||
+ sector_nr + RESYNC_SECTORS > mddev->recovery_cp) {
bio->bi_rw = WRITE;
bio->bi_end_io = end_sync_write;
write_targets ++;
} else
+ /* no need to read or write here */
continue;
bio->bi_sector = sector_nr + conf->mirrors[i].rdev->data_offset;
bio->bi_bdev = conf->mirrors[i].rdev->bdev;
bio->bi_private = r1_bio;
}
- if (write_targets + 1 < conf->raid_disks)
- /* array degraded, can't clear bitmap */
- set_bit(R1BIO_Degraded, &r1_bio->state);
-
if (write_targets == 0) {
/* There is nowhere to write, so all non-sync
* drives must be failed - so we are finished
@@ -1243,15 +1242,15 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
len = (max_sector - sector_nr) << 9;
if (len == 0)
break;
- if (!conf->fullsync) {
- if (sync_blocks == 0) {
- if (!bitmap_start_sync(mddev->bitmap,
- sector_nr, &sync_blocks))
- break;
- if (sync_blocks < (PAGE_SIZE>>9))
- BUG();
- if (len > (sync_blocks<<9)) len = sync_blocks<<9;
- }
+ if (sync_blocks == 0) {
+ if (!bitmap_start_sync(mddev->bitmap, sector_nr,
+ &sync_blocks, still_degraded) &&
+ !conf->fullsync)
+ break;
+ if (sync_blocks < (PAGE_SIZE>>9))
+ BUG();
+ if (len > (sync_blocks<<9))
+ len = sync_blocks<<9;
}
for (i=0 ; i < conf->raid_disks; i++) {
@@ -1264,7 +1263,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
while (i > 0) {
i--;
bio = r1_bio->bios[i];
- if (bio->bi_end_io==NULL) continue;
+ if (bio->bi_end_io==NULL)
+ continue;
/* remove last page from this bio */
bio->bi_vcnt--;
bio->bi_size -= len;
@@ -1469,6 +1469,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
mddev->size = mddev->array_size;
+ mddev->resync_max_sectors = sectors;
return 0;
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 93a9726cc2d6..43f231a467d5 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1653,6 +1653,7 @@ static int run (mddev_t *mddev)
/* device size must be a multiple of chunk size */
mddev->size &= ~(mddev->chunk_size/1024 -1);
+ mddev->resync_max_sectors = mddev->size << 1;
if (!conf->chunk_size || conf->chunk_size % 4) {
printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
@@ -1931,6 +1932,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
mddev->size = sectors /2;
+ mddev->resync_max_sectors = sectors;
return 0;
}
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index f62ea1a73d0d..495dee1d1e83 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -1813,6 +1813,7 @@ static int run (mddev_t *mddev)
/* device size must be a multiple of chunk size */
mddev->size &= ~(mddev->chunk_size/1024 -1);
+ mddev->resync_max_sectors = mddev->size << 1;
if (conf->raid_disks < 4) {
printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n",
@@ -2095,6 +2096,7 @@ static int raid6_resize(mddev_t *mddev, sector_t sectors)
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
mddev->size = sectors /2;
+ mddev->resync_max_sectors = sectors;
return 0;
}
OpenPOWER on IntegriCloud