diff options
author | Jeff Garzik <jgarzik@pobox.com> | 2005-08-10 13:46:28 -0400 |
---|---|---|
committer | Jeff Garzik <jgarzik@pobox.com> | 2005-08-10 13:46:28 -0400 |
commit | 2f058256cb64e346f4fb4499ff4e0f1c2791a4b4 (patch) | |
tree | 91e06602f4d3abb6812ea8c9bc9ba4501e14c84e /drivers/md | |
parent | 0274aa2506fd2fe89a58dd6cd64d3b3f7b976af8 (diff) | |
parent | 86b3786078d63242d3194ffc58ae8dae1d1bbef3 (diff) | |
download | blackbird-op-linux-2f058256cb64e346f4fb4499ff4e0f1c2791a4b4.tar.gz blackbird-op-linux-2f058256cb64e346f4fb4499ff4e0f1c2791a4b4.zip |
Merge /spare/repo/linux-2.6/
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 92 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 14 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 68 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 7 | ||||
-rw-r--r-- | drivers/md/dm.c | 207 | ||||
-rw-r--r-- | drivers/md/md.c | 10 | ||||
-rw-r--r-- | drivers/md/raid0.c | 8 | ||||
-rw-r--r-- | drivers/md/raid1.c | 63 | ||||
-rw-r--r-- | drivers/md/raid5.c | 2 | ||||
-rw-r--r-- | drivers/md/raid6main.c | 2 |
12 files changed, 256 insertions, 226 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 95980ad6b27b..41df4cda66e2 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -108,7 +108,7 @@ static unsigned char *bitmap_alloc_page(struct bitmap *bitmap) { unsigned char *page; -#if INJECT_FAULTS_1 +#ifdef INJECT_FAULTS_1 page = NULL; #else page = kmalloc(PAGE_SIZE, GFP_NOIO); @@ -818,8 +818,7 @@ int bitmap_unplug(struct bitmap *bitmap) return 0; } -static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, - unsigned long sectors, int in_sync); +static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset); /* * bitmap_init_from_disk -- called at bitmap_create time to initialize * the in-memory bitmap from the on-disk bitmap -- also, sets up the * memory mapping of the bitmap file @@ -828,7 +827,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, * previously kicked from the array, we mark all the bits as * 1's in order to cause a full resync. */ -static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync) +static int bitmap_init_from_disk(struct bitmap *bitmap) { unsigned long i, chunks, index, oldindex, bit; struct page *page = NULL, *oldpage = NULL; @@ -843,7 +842,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync) BUG_ON(!file && !bitmap->offset); -#if INJECT_FAULTS_3 +#ifdef INJECT_FAULTS_3 outofdate = 1; #else outofdate = bitmap->flags & BITMAP_STALE; @@ -929,8 +928,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync) } if (test_bit(bit, page_address(page))) { /* if the disk bit is set, set the memory bit */ - bitmap_set_memory_bits(bitmap, - i << CHUNK_BLOCK_SHIFT(bitmap), 1, in_sync); + bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap)); bit_cnt++; } } @@ -1187,7 +1185,7 @@ static int bitmap_start_daemon(struct bitmap *bitmap, mdk_thread_t **ptr, spin_unlock_irqrestore(&bitmap->lock, flags); -#if INJECT_FATAL_FAULT_2 +#ifdef INJECT_FATAL_FAULT_2 daemon = NULL; #else sprintf(namebuf, "%%s_%s", name); @@ -1345,7 +1343,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto } } -int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks) +int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, + int degraded) { bitmap_counter_t *bmc; int rv; @@ -1362,8 +1361,10 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks) rv = 1; else if (NEEDED(*bmc)) { rv = 1; - *bmc |= RESYNC_MASK; - *bmc &= ~NEEDED_MASK; + if (!degraded) { /* don't set/clear bits if degraded */ + *bmc |= RESYNC_MASK; + *bmc &= ~NEEDED_MASK; + } } } spin_unlock_irq(&bitmap->lock); @@ -1423,35 +1424,53 @@ void bitmap_close_sync(struct bitmap *bitmap) } } -static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, - unsigned long sectors, int in_sync) +static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset) { /* For each chunk covered by any of these sectors, set the - * counter to 1 and set resync_needed unless in_sync. They should all + * counter to 1 and set resync_needed. They should all * be 0 at this point */ - while (sectors) { - int secs; - bitmap_counter_t *bmc; - spin_lock_irq(&bitmap->lock); - bmc = bitmap_get_counter(bitmap, offset, &secs, 1); - if (!bmc) { - spin_unlock_irq(&bitmap->lock); - return; - } - if (! *bmc) { - struct page *page; - *bmc = 1 | (in_sync? 0 : NEEDED_MASK); - bitmap_count_page(bitmap, offset, 1); - page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); - set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); - } + + int secs; + bitmap_counter_t *bmc; + spin_lock_irq(&bitmap->lock); + bmc = bitmap_get_counter(bitmap, offset, &secs, 1); + if (!bmc) { spin_unlock_irq(&bitmap->lock); - if (sectors > secs) - sectors -= secs; - else - sectors = 0; + return; + } + if (! *bmc) { + struct page *page; + *bmc = 1 | NEEDED_MASK; + bitmap_count_page(bitmap, offset, 1); + page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); + set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); } + spin_unlock_irq(&bitmap->lock); + +} + +/* + * flush out any pending updates + */ +void bitmap_flush(mddev_t *mddev) +{ + struct bitmap *bitmap = mddev->bitmap; + int sleep; + + if (!bitmap) /* there was no bitmap */ + return; + + /* run the daemon_work three time to ensure everything is flushed + * that can be + */ + sleep = bitmap->daemon_sleep; + bitmap->daemon_sleep = 0; + bitmap_daemon_work(bitmap); + bitmap_daemon_work(bitmap); + bitmap_daemon_work(bitmap); + bitmap->daemon_sleep = sleep; + bitmap_update_sb(bitmap); } /* @@ -1549,7 +1568,7 @@ int bitmap_create(mddev_t *mddev) bitmap->syncchunk = ~0UL; -#if INJECT_FATAL_FAULT_1 +#ifdef INJECT_FATAL_FAULT_1 bitmap->bp = NULL; #else bitmap->bp = kmalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL); @@ -1562,7 +1581,8 @@ int bitmap_create(mddev_t *mddev) /* now that we have some pages available, initialize the in-memory * bitmap from the on-disk bitmap */ - err = bitmap_init_from_disk(bitmap, mddev->recovery_cp == MaxSector); + err = bitmap_init_from_disk(bitmap); + if (err) return err; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index ee3c869d9701..200a0688f717 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -122,14 +122,6 @@ static struct hash_cell *__get_uuid_cell(const char *str) /*----------------------------------------------------------------- * Inserting, removing and renaming a device. *---------------------------------------------------------------*/ -static inline char *kstrdup(const char *str) -{ - char *r = kmalloc(strlen(str) + 1, GFP_KERNEL); - if (r) - strcpy(r, str); - return r; -} - static struct hash_cell *alloc_cell(const char *name, const char *uuid, struct mapped_device *md) { @@ -139,7 +131,7 @@ static struct hash_cell *alloc_cell(const char *name, const char *uuid, if (!hc) return NULL; - hc->name = kstrdup(name); + hc->name = kstrdup(name, GFP_KERNEL); if (!hc->name) { kfree(hc); return NULL; @@ -149,7 +141,7 @@ static struct hash_cell *alloc_cell(const char *name, const char *uuid, hc->uuid = NULL; else { - hc->uuid = kstrdup(uuid); + hc->uuid = kstrdup(uuid, GFP_KERNEL); if (!hc->uuid) { kfree(hc->name); kfree(hc); @@ -273,7 +265,7 @@ static int dm_hash_rename(const char *old, const char *new) /* * duplicate new. */ - new_name = kstrdup(new); + new_name = kstrdup(new, GFP_KERNEL); if (!new_name) return -ENOMEM; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 0c1b8520ef86..785806bdb248 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -63,6 +63,7 @@ struct multipath { unsigned nr_priority_groups; struct list_head priority_groups; unsigned pg_init_required; /* pg_init needs calling? */ + unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ unsigned nr_valid_paths; /* Total number of usable paths */ struct pgpath *current_pgpath; @@ -72,7 +73,7 @@ struct multipath { unsigned queue_io; /* Must we queue all I/O? */ unsigned queue_if_no_path; /* Queue I/O if last path fails? */ - unsigned suspended; /* Has dm core suspended our I/O? */ + unsigned saved_queue_if_no_path;/* Saved state during suspension */ struct work_struct process_queued_ios; struct bio_list queued_ios; @@ -304,11 +305,12 @@ static int map_io(struct multipath *m, struct bio *bio, struct mpath_io *mpio, m->queue_size--; if ((pgpath && m->queue_io) || - (!pgpath && m->queue_if_no_path && !m->suspended)) { + (!pgpath && m->queue_if_no_path)) { /* Queue for the daemon to resubmit */ bio_list_add(&m->queued_ios, bio); m->queue_size++; - if (m->pg_init_required || !m->queue_io) + if ((m->pg_init_required && !m->pg_init_in_progress) || + !m->queue_io) queue_work(kmultipathd, &m->process_queued_ios); pgpath = NULL; r = 0; @@ -333,8 +335,9 @@ static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path) spin_lock_irqsave(&m->lock, flags); + m->saved_queue_if_no_path = m->queue_if_no_path; m->queue_if_no_path = queue_if_no_path; - if (!m->queue_if_no_path) + if (!m->queue_if_no_path && m->queue_size) queue_work(kmultipathd, &m->process_queued_ios); spin_unlock_irqrestore(&m->lock, flags); @@ -379,25 +382,31 @@ static void process_queued_ios(void *data) { struct multipath *m = (struct multipath *) data; struct hw_handler *hwh = &m->hw_handler; - struct pgpath *pgpath; - unsigned init_required, must_queue = 0; + struct pgpath *pgpath = NULL; + unsigned init_required = 0, must_queue = 1; unsigned long flags; spin_lock_irqsave(&m->lock, flags); + if (!m->queue_size) + goto out; + if (!m->current_pgpath) __choose_pgpath(m); pgpath = m->current_pgpath; - if ((pgpath && m->queue_io) || - (!pgpath && m->queue_if_no_path && !m->suspended)) - must_queue = 1; + if ((pgpath && !m->queue_io) || + (!pgpath && !m->queue_if_no_path)) + must_queue = 0; - init_required = m->pg_init_required; - if (init_required) + if (m->pg_init_required && !m->pg_init_in_progress) { m->pg_init_required = 0; + m->pg_init_in_progress = 1; + init_required = 1; + } +out: spin_unlock_irqrestore(&m->lock, flags); if (init_required) @@ -752,6 +761,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, static void multipath_dtr(struct dm_target *ti) { struct multipath *m = (struct multipath *) ti->private; + + flush_workqueue(kmultipathd); free_multipath(m); } @@ -765,6 +776,9 @@ static int multipath_map(struct dm_target *ti, struct bio *bio, struct mpath_io *mpio; struct multipath *m = (struct multipath *) ti->private; + if (bio_barrier(bio)) + return -EOPNOTSUPP; + mpio = mempool_alloc(m->mpio_pool, GFP_NOIO); dm_bio_record(&mpio->details, bio); @@ -837,7 +851,7 @@ static int reinstate_path(struct pgpath *pgpath) pgpath->path.is_active = 1; m->current_pgpath = NULL; - if (!m->nr_valid_paths++) + if (!m->nr_valid_paths++ && m->queue_size) queue_work(kmultipathd, &m->process_queued_ios); queue_work(kmultipathd, &m->trigger_event); @@ -963,12 +977,13 @@ void dm_pg_init_complete(struct path *path, unsigned err_flags) bypass_pg(m, pg, 1); spin_lock_irqsave(&m->lock, flags); - if (!err_flags) - m->queue_io = 0; - else { + if (err_flags) { m->current_pgpath = NULL; m->current_pg = NULL; - } + } else if (!m->pg_init_required) + m->queue_io = 0; + + m->pg_init_in_progress = 0; queue_work(kmultipathd, &m->process_queued_ios); spin_unlock_irqrestore(&m->lock, flags); } @@ -988,9 +1003,12 @@ static int do_end_io(struct multipath *m, struct bio *bio, if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio)) return error; + if (error == -EOPNOTSUPP) + return error; + spin_lock(&m->lock); if (!m->nr_valid_paths) { - if (!m->queue_if_no_path || m->suspended) { + if (!m->queue_if_no_path) { spin_unlock(&m->lock); return -EIO; } else { @@ -1051,27 +1069,27 @@ static int multipath_end_io(struct dm_target *ti, struct bio *bio, /* * Suspend can't complete until all the I/O is processed so if - * the last path failed we will now error any queued I/O. + * the last path fails we must error any remaining I/O. + * Note that if the freeze_bdev fails while suspending, the + * queue_if_no_path state is lost - userspace should reset it. */ static void multipath_presuspend(struct dm_target *ti) { struct multipath *m = (struct multipath *) ti->private; - unsigned long flags; - spin_lock_irqsave(&m->lock, flags); - m->suspended = 1; - if (m->queue_if_no_path) - queue_work(kmultipathd, &m->process_queued_ios); - spin_unlock_irqrestore(&m->lock, flags); + queue_if_no_path(m, 0); } +/* + * Restore the queue_if_no_path setting. + */ static void multipath_resume(struct dm_target *ti) { struct multipath *m = (struct multipath *) ti->private; unsigned long flags; spin_lock_irqsave(&m->lock, flags); - m->suspended = 0; + m->queue_if_no_path = m->saved_queue_if_no_path; spin_unlock_irqrestore(&m->lock, flags); } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 6e3cf7e13451..b08df8b9b2ca 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1060,6 +1060,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->private = ms; + ti->split_io = ms->rh.region_size; r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client); if (r) { @@ -1229,7 +1230,7 @@ static int __init dm_mirror_init(void) if (r) return r; - _kmirrord_wq = create_workqueue("kmirrord"); + _kmirrord_wq = create_singlethread_workqueue("kmirrord"); if (!_kmirrord_wq) { DMERR("couldn't start kmirrord"); dm_dirty_log_exit(); diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 7e691ab9a748..ab54f99b7c3b 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -777,7 +777,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, /* Full snapshots are not usable */ if (!s->valid) - return -1; + return -EIO; /* * Write to snapshot - higher level takes care of RW/RO @@ -931,6 +931,10 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) if (!snap->valid) continue; + /* Nothing to do if writing beyond end of snapshot */ + if (bio->bi_sector >= dm_table_get_size(snap->table)) + continue; + down_write(&snap->lock); /* diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 18e9b9953fcd..a6d3baa46f61 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -869,11 +869,17 @@ static void suspend_targets(struct dm_table *t, unsigned postsuspend) void dm_table_presuspend_targets(struct dm_table *t) { + if (!t) + return; + return suspend_targets(t, 0); } void dm_table_postsuspend_targets(struct dm_table *t) { + if (!t) + return; + return suspend_targets(t, 1); } @@ -943,6 +949,7 @@ EXPORT_SYMBOL(dm_vcalloc); EXPORT_SYMBOL(dm_get_device); EXPORT_SYMBOL(dm_put_device); EXPORT_SYMBOL(dm_table_event); +EXPORT_SYMBOL(dm_table_get_size); EXPORT_SYMBOL(dm_table_get_mode); EXPORT_SYMBOL(dm_table_put); EXPORT_SYMBOL(dm_table_get); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f6b03957efc7..d487d9deb98e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -55,10 +55,10 @@ union map_info *dm_get_mapinfo(struct bio *bio) */ #define DMF_BLOCK_IO 0 #define DMF_SUSPENDED 1 -#define DMF_FS_LOCKED 2 struct mapped_device { - struct rw_semaphore lock; + struct rw_semaphore io_lock; + struct semaphore suspend_lock; rwlock_t map_lock; atomic_t holders; @@ -248,16 +248,16 @@ static inline void free_tio(struct mapped_device *md, struct target_io *tio) */ static int queue_io(struct mapped_device *md, struct bio *bio) { - down_write(&md->lock); + down_write(&md->io_lock); if (!test_bit(DMF_BLOCK_IO, &md->flags)) { - up_write(&md->lock); + up_write(&md->io_lock); return 1; } bio_list_add(&md->deferred, bio); - up_write(&md->lock); + up_write(&md->io_lock); return 0; /* deferred successfully */ } @@ -384,7 +384,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, /* error the io and bail out */ struct dm_io *io = tio->io; free_tio(tio->io->md, tio); - dec_pending(io, -EIO); + dec_pending(io, r); bio_put(clone); } } @@ -568,14 +568,14 @@ static int dm_request(request_queue_t *q, struct bio *bio) int r; struct mapped_device *md = q->queuedata; - down_read(&md->lock); + down_read(&md->io_lock); /* * If we're suspended we have to queue * this io for later. */ while (test_bit(DMF_BLOCK_IO, &md->flags)) { - up_read(&md->lock); + up_read(&md->io_lock); if (bio_rw(bio) == READA) { bio_io_error(bio, bio->bi_size); @@ -594,11 +594,11 @@ static int dm_request(request_queue_t *q, struct bio *bio) * We're in a while loop, because someone could suspend * before we get to the following read lock. */ - down_read(&md->lock); + down_read(&md->io_lock); } __split_bio(md, bio); - up_read(&md->lock); + up_read(&md->io_lock); return 0; } @@ -610,7 +610,7 @@ static int dm_flush_all(request_queue_t *q, struct gendisk *disk, int ret = -ENXIO; if (map) { - ret = dm_table_flush_all(md->map); + ret = dm_table_flush_all(map); dm_table_put(map); } @@ -747,7 +747,8 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent) goto bad1; memset(md, 0, sizeof(*md)); - init_rwsem(&md->lock); + init_rwsem(&md->io_lock); + init_MUTEX(&md->suspend_lock); rwlock_init(&md->map_lock); atomic_set(&md->holders, 1); atomic_set(&md->event_nr, 0); @@ -825,18 +826,13 @@ static void event_callback(void *context) wake_up(&md->eventq); } -static void __set_size(struct gendisk *disk, sector_t size) +static void __set_size(struct mapped_device *md, sector_t size) { - struct block_device *bdev; - - set_capacity(disk, size); - bdev = bdget_disk(disk, 0); - if (bdev) { - down(&bdev->bd_inode->i_sem); - i_size_write(bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); - up(&bdev->bd_inode->i_sem); - bdput(bdev); - } + set_capacity(md->disk, size); + + down(&md->frozen_bdev->bd_inode->i_sem); + i_size_write(md->frozen_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); + up(&md->frozen_bdev->bd_inode->i_sem); } static int __bind(struct mapped_device *md, struct dm_table *t) @@ -845,17 +841,18 @@ static int __bind(struct mapped_device *md, struct dm_table *t) sector_t size; size = dm_table_get_size(t); - __set_size(md->disk, size); + __set_size(md, size); if (size == 0) return 0; + dm_table_get(t); + dm_table_event_callback(t, event_callback, md); + write_lock(&md->map_lock); md->map = t; + dm_table_set_restrictions(t, q); write_unlock(&md->map_lock); - dm_table_get(t); - dm_table_event_callback(md->map, event_callback, md); - dm_table_set_restrictions(t, q); return 0; } @@ -935,7 +932,7 @@ void dm_put(struct mapped_device *md) struct dm_table *map = dm_get_table(md); if (atomic_dec_and_test(&md->holders)) { - if (!test_bit(DMF_SUSPENDED, &md->flags) && map) { + if (!dm_suspended(md)) { dm_table_presuspend_targets(map); dm_table_postsuspend_targets(map); } @@ -966,39 +963,33 @@ static void __flush_deferred_io(struct mapped_device *md, struct bio *c) */ int dm_swap_table(struct mapped_device *md, struct dm_table *table) { - int r; + int r = -EINVAL; - down_write(&md->lock); + down(&md->suspend_lock); /* device must be suspended */ - if (!test_bit(DMF_SUSPENDED, &md->flags)) { - up_write(&md->lock); - return -EPERM; - } + if (!dm_suspended(md)) + goto out; __unbind(md); r = __bind(md, table); - if (r) - return r; - up_write(&md->lock); - return 0; +out: + up(&md->suspend_lock); + return r; } /* * Functions to lock and unlock any filesystem running on the * device. */ -static int __lock_fs(struct mapped_device *md) +static int lock_fs(struct mapped_device *md) { - int error = -ENOMEM; - - if (test_and_set_bit(DMF_FS_LOCKED, &md->flags)) - return 0; + int r = -ENOMEM; md->frozen_bdev = bdget_disk(md->disk, 0); if (!md->frozen_bdev) { - DMWARN("bdget failed in __lock_fs"); + DMWARN("bdget failed in lock_fs"); goto out; } @@ -1006,13 +997,13 @@ static int __lock_fs(struct mapped_device *md) md->frozen_sb = freeze_bdev(md->frozen_bdev); if (IS_ERR(md->frozen_sb)) { - error = PTR_ERR(md->frozen_sb); + r = PTR_ERR(md->frozen_sb); goto out_bdput; } /* don't bdput right now, we don't want the bdev * to go away while it is locked. We'll bdput - * in __unlock_fs + * in unlock_fs */ return 0; @@ -1021,15 +1012,11 @@ out_bdput: md->frozen_sb = NULL; md->frozen_bdev = NULL; out: - clear_bit(DMF_FS_LOCKED, &md->flags); - return error; + return r; } -static void __unlock_fs(struct mapped_device *md) +static void unlock_fs(struct mapped_device *md) { - if (!test_and_clear_bit(DMF_FS_LOCKED, &md->flags)) - return; - thaw_bdev(md->frozen_bdev, md->frozen_sb); bdput(md->frozen_bdev); @@ -1046,47 +1033,37 @@ static void __unlock_fs(struct mapped_device *md) */ int dm_suspend(struct mapped_device *md) { - struct dm_table *map; + struct dm_table *map = NULL; DECLARE_WAITQUEUE(wait, current); - int error = -EINVAL; + int r = -EINVAL; - /* Flush I/O to the device. */ - down_read(&md->lock); - if (test_bit(DMF_BLOCK_IO, &md->flags)) - goto out_read_unlock; + down(&md->suspend_lock); - error = __lock_fs(md); - if (error) - goto out_read_unlock; + if (dm_suspended(md)) + goto out; map = dm_get_table(md); - if (map) - dm_table_presuspend_targets(map); - up_read(&md->lock); + /* This does not get reverted if there's an error later. */ + dm_table_presuspend_targets(map); + + /* Flush I/O to the device. */ + r = lock_fs(md); + if (r) + goto out; /* * First we set the BLOCK_IO flag so no more ios will be mapped. - * - * If the flag is already set we know another thread is trying to - * suspend as well, so we leave the fs locked for this thread. */ - error = -EINVAL; - down_write(&md->lock); - if (test_and_set_bit(DMF_BLOCK_IO, &md->flags)) { - if (map) - dm_table_put(map); - goto out_write_unlock; - } + down_write(&md->io_lock); + set_bit(DMF_BLOCK_IO, &md->flags); add_wait_queue(&md->wait, &wait); - up_write(&md->lock); + up_write(&md->io_lock); /* unplug */ - if (map) { + if (map) dm_table_unplug_all(map); - dm_table_put(map); - } /* * Then we wait for the already mapped ios to @@ -1102,63 +1079,67 @@ int dm_suspend(struct mapped_device *md) } set_current_state(TASK_RUNNING); - down_write(&md->lock); + down_write(&md->io_lock); remove_wait_queue(&md->wait, &wait); /* were we interrupted ? */ - error = -EINTR; - if (atomic_read(&md->pending)) - goto out_unfreeze; + r = -EINTR; + if (atomic_read(&md->pending)) { + up_write(&md->io_lock); + unlock_fs(md); + clear_bit(DMF_BLOCK_IO, &md->flags); + goto out; + } + up_write(&md->io_lock); - set_bit(DMF_SUSPENDED, &md->flags); + dm_table_postsuspend_targets(map); - map = dm_get_table(md); - if (map) - dm_table_postsuspend_targets(map); - dm_table_put(map); - up_write(&md->lock); - - return 0; + set_bit(DMF_SUSPENDED, &md->flags); -out_unfreeze: - /* FIXME Undo dm_table_presuspend_targets */ - __unlock_fs(md); - clear_bit(DMF_BLOCK_IO, &md->flags); -out_write_unlock: - up_write(&md->lock); - return error; + r = 0; -out_read_unlock: - up_read(&md->lock); - return error; +out: + dm_table_put(map); + up(&md->suspend_lock); + return r; } int dm_resume(struct mapped_device *md) { + int r = -EINVAL; struct bio *def; - struct dm_table *map = dm_get_table(md); + struct dm_table *map = NULL; - down_write(&md->lock); - if (!map || - !test_bit(DMF_SUSPENDED, &md->flags) || - !dm_table_get_size(map)) { - up_write(&md->lock); - dm_table_put(map); - return -EINVAL; - } + down(&md->suspend_lock); + if (!dm_suspended(md)) + goto out; + + map = dm_get_table(md); + if (!map || !dm_table_get_size(map)) + goto out; dm_table_resume_targets(map); - clear_bit(DMF_SUSPENDED, &md->flags); + + down_write(&md->io_lock); clear_bit(DMF_BLOCK_IO, &md->flags); def = bio_list_get(&md->deferred); __flush_deferred_io(md, def); - up_write(&md->lock); - __unlock_fs(md); + up_write(&md->io_lock); + + unlock_fs(md); + + clear_bit(DMF_SUSPENDED, &md->flags); + dm_table_unplug_all(map); + + r = 0; + +out: dm_table_put(map); + up(&md->suspend_lock); - return 0; + return r; } /*----------------------------------------------------------------- diff --git a/drivers/md/md.c b/drivers/md/md.c index 0c6b5b6baff6..480f658db6f2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -284,7 +284,7 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev) return NULL; } -inline static sector_t calc_dev_sboffset(struct block_device *bdev) +static inline sector_t calc_dev_sboffset(struct block_device *bdev) { sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; return MD_NEW_SIZE_BLOCKS(size); @@ -338,6 +338,7 @@ static int super_written(struct bio *bio, unsigned int bytes_done, int error) if (atomic_dec_and_test(&rdev->mddev->pending_writes)) wake_up(&rdev->mddev->sb_wait); + bio_put(bio); return 0; } @@ -1797,6 +1798,8 @@ static int do_md_stop(mddev_t * mddev, int ro) goto out; mddev->ro = 1; } else { + bitmap_flush(mddev); + wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); if (mddev->ro) set_disk_ro(disk, 0); blk_queue_make_request(mddev->queue, md_fail_request); @@ -2976,8 +2979,7 @@ static int md_thread(void * arg) wait_event_interruptible_timeout(thread->wqueue, test_bit(THREAD_WAKEUP, &thread->flags), thread->timeout); - if (current->flags & PF_FREEZE) - refrigerator(PF_FREEZE); + try_to_freeze(); clear_bit(THREAD_WAKEUP, &thread->flags); @@ -3484,7 +3486,6 @@ static void md_do_sync(mddev_t *mddev) goto skip; } ITERATE_MDDEV(mddev2,tmp) { - printk("."); if (mddev2 == mddev) continue; if (mddev2->curr_resync && @@ -4007,3 +4008,4 @@ EXPORT_SYMBOL(md_wakeup_thread); EXPORT_SYMBOL(md_print_devices); EXPORT_SYMBOL(md_check_recovery); MODULE_LICENSE("GPL"); +MODULE_ALIAS("md"); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e11dd14d0b43..2120710172c5 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -314,16 +314,16 @@ static int raid0_run (mddev_t *mddev) sector_t space = conf->hash_spacing; int round; conf->preshift = 0; - if (sizeof(sector_t) > sizeof(unsigned long)) { + if (sizeof(sector_t) > sizeof(u32)) { /*shift down space and s so that sector_div will work */ - while (space > (sector_t) (~(unsigned long)0)) { + while (space > (sector_t) (~(u32)0)) { s >>= 1; space >>= 1; s += 1; /* force round-up */ conf->preshift++; } } - round = sector_div(s, (unsigned long)space) ? 1 : 0; + round = sector_div(s, (u32)space) ? 1 : 0; nb_zone = s + round; } printk("raid0 : nb_zone is %d.\n", nb_zone); @@ -443,7 +443,7 @@ static int raid0_make_request (request_queue_t *q, struct bio *bio) volatile #endif sector_t x = block >> conf->preshift; - sector_div(x, (unsigned long)conf->hash_spacing); + sector_div(x, (u32)conf->hash_spacing); zone = conf->hash_table[x]; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ff1dbec864af..51d9645ed09c 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -893,7 +893,6 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) if (!uptodate) { md_error(r1_bio->mddev, conf->mirrors[r1_bio->read_disk].rdev); - set_bit(R1BIO_Degraded, &r1_bio->state); } else set_bit(R1BIO_Uptodate, &r1_bio->state); rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev); @@ -918,10 +917,9 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error) mirror = i; break; } - if (!uptodate) { + if (!uptodate) md_error(mddev, conf->mirrors[mirror].rdev); - set_bit(R1BIO_Degraded, &r1_bio->state); - } + update_head_pos(mirror, r1_bio); if (atomic_dec_and_test(&r1_bio->remaining)) { @@ -1109,6 +1107,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i int i; int write_targets = 0; int sync_blocks; + int still_degraded = 0; if (!conf->r1buf_pool) { @@ -1126,21 +1125,22 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i * only be one in raid1 resync. * We can find the current addess in mddev->curr_resync */ - if (!conf->fullsync) { - if (mddev->curr_resync < max_sector) - bitmap_end_sync(mddev->bitmap, - mddev->curr_resync, + if (mddev->curr_resync < max_sector) /* aborted */ + bitmap_end_sync(mddev->bitmap, mddev->curr_resync, &sync_blocks, 1); - bitmap_close_sync(mddev->bitmap); - } - if (mddev->curr_resync >= max_sector) + else /* completed sync */ conf->fullsync = 0; + + bitmap_close_sync(mddev->bitmap); close_sync(conf); return 0; } - if (!conf->fullsync && - !bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks)) { + /* before building a request, check if we can skip these blocks.. + * This call the bitmap_start_sync doesn't actually record anything + */ + if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && + !conf->fullsync) { /* We can skip this block, and probably several more */ *skipped = 1; return sync_blocks; @@ -1205,24 +1205,23 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i if (i == disk) { bio->bi_rw = READ; bio->bi_end_io = end_sync_read; - } else if (conf->mirrors[i].rdev && - !conf->mirrors[i].rdev->faulty && - (!conf->mirrors[i].rdev->in_sync || - sector_nr + RESYNC_SECTORS > mddev->recovery_cp)) { + } else if (conf->mirrors[i].rdev == NULL || + conf->mirrors[i].rdev->faulty) { + still_degraded = 1; + continue; + } else if (!conf->mirrors[i].rdev->in_sync || + sector_nr + RESYNC_SECTORS > mddev->recovery_cp) { bio->bi_rw = WRITE; bio->bi_end_io = end_sync_write; write_targets ++; } else + /* no need to read or write here */ continue; bio->bi_sector = sector_nr + conf->mirrors[i].rdev->data_offset; bio->bi_bdev = conf->mirrors[i].rdev->bdev; bio->bi_private = r1_bio; } - if (write_targets + 1 < conf->raid_disks) - /* array degraded, can't clear bitmap */ - set_bit(R1BIO_Degraded, &r1_bio->state); - if (write_targets == 0) { /* There is nowhere to write, so all non-sync * drives must be failed - so we are finished @@ -1243,15 +1242,15 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i len = (max_sector - sector_nr) << 9; if (len == 0) break; - if (!conf->fullsync) { - if (sync_blocks == 0) { - if (!bitmap_start_sync(mddev->bitmap, - sector_nr, &sync_blocks)) - break; - if (sync_blocks < (PAGE_SIZE>>9)) - BUG(); - if (len > (sync_blocks<<9)) len = sync_blocks<<9; - } + if (sync_blocks == 0) { + if (!bitmap_start_sync(mddev->bitmap, sector_nr, + &sync_blocks, still_degraded) && + !conf->fullsync) + break; + if (sync_blocks < (PAGE_SIZE>>9)) + BUG(); + if (len > (sync_blocks<<9)) + len = sync_blocks<<9; } for (i=0 ; i < conf->raid_disks; i++) { @@ -1264,7 +1263,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i while (i > 0) { i--; bio = r1_bio->bios[i]; - if (bio->bi_end_io==NULL) continue; + if (bio->bi_end_io==NULL) + continue; /* remove last page from this bio */ bio->bi_vcnt--; bio->bi_size -= len; @@ -1469,6 +1469,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } mddev->size = mddev->array_size; + mddev->resync_max_sectors = sectors; return 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 93a9726cc2d6..43f231a467d5 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1653,6 +1653,7 @@ static int run (mddev_t *mddev) /* device size must be a multiple of chunk size */ mddev->size &= ~(mddev->chunk_size/1024 -1); + mddev->resync_max_sectors = mddev->size << 1; if (!conf->chunk_size || conf->chunk_size % 4) { printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", @@ -1931,6 +1932,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } mddev->size = sectors /2; + mddev->resync_max_sectors = sectors; return 0; } diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index f62ea1a73d0d..495dee1d1e83 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -1813,6 +1813,7 @@ static int run (mddev_t *mddev) /* device size must be a multiple of chunk size */ mddev->size &= ~(mddev->chunk_size/1024 -1); + mddev->resync_max_sectors = mddev->size << 1; if (conf->raid_disks < 4) { printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n", @@ -2095,6 +2096,7 @@ static int raid6_resize(mddev_t *mddev, sector_t sectors) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } mddev->size = sectors /2; + mddev->resync_max_sectors = sectors; return 0; } |