Merge /spare/repo/linux-2.6/

author: Jeff Garzik <jgarzik@pobox.com> 2005-08-10 13:46:28 -0400
committer: Jeff Garzik <jgarzik@pobox.com> 2005-08-10 13:46:28 -0400
commit: 2f058256cb64e346f4fb4499ff4e0f1c2791a4b4 (patch)
tree: 91e06602f4d3abb6812ea8c9bc9ba4501e14c84e /drivers/md
parent: 0274aa2506fd2fe89a58dd6cd64d3b3f7b976af8 (diff)
parent: 86b3786078d63242d3194ffc58ae8dae1d1bbef3 (diff)
download: blackbird-op-linux-2f058256cb64e346f4fb4499ff4e0f1c2791a4b4.tar.gz
blackbird-op-linux-2f058256cb64e346f4fb4499ff4e0f1c2791a4b4.zip
12 files changed, 256 insertions, 226 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 95980ad6b27b..41df4cda66e2 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -108,7 +108,7 @@ static unsigned char *bitmap_alloc_page(struct bitmap *bitmap)
 {
 	unsigned char *page;
 
-#if INJECT_FAULTS_1
+#ifdef INJECT_FAULTS_1
 	page = NULL;
 #else
 	page = kmalloc(PAGE_SIZE, GFP_NOIO);
@@ -818,8 +818,7 @@ int bitmap_unplug(struct bitmap *bitmap)
 	return 0;
 }
 
-static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
-	unsigned long sectors, int in_sync);
+static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset);
 /* * bitmap_init_from_disk -- called at bitmap_create time to initialize
  * the in-memory bitmap from the on-disk bitmap -- also, sets up the
  * memory mapping of the bitmap file
@@ -828,7 +827,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
  *   previously kicked from the array, we mark all the bits as
  *   1's in order to cause a full resync.
  */
-static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
+static int bitmap_init_from_disk(struct bitmap *bitmap)
 {
 	unsigned long i, chunks, index, oldindex, bit;
 	struct page *page = NULL, *oldpage = NULL;
@@ -843,7 +842,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
 
 	BUG_ON(!file && !bitmap->offset);
 
-#if INJECT_FAULTS_3
+#ifdef INJECT_FAULTS_3
 	outofdate = 1;
 #else
 	outofdate = bitmap->flags & BITMAP_STALE;
@@ -929,8 +928,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
 		}
 		if (test_bit(bit, page_address(page))) {
 			/* if the disk bit is set, set the memory bit */
-			bitmap_set_memory_bits(bitmap,
-					i << CHUNK_BLOCK_SHIFT(bitmap), 1, in_sync);
+			bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap));
 			bit_cnt++;
 		}
 	}
@@ -1187,7 +1185,7 @@ static int bitmap_start_daemon(struct bitmap *bitmap, mdk_thread_t **ptr,
 
 	spin_unlock_irqrestore(&bitmap->lock, flags);
 
-#if INJECT_FATAL_FAULT_2
+#ifdef INJECT_FATAL_FAULT_2
 	daemon = NULL;
 #else
 	sprintf(namebuf, "%%s_%s", name);
@@ -1345,7 +1343,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
 	}
 }
 
-int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks)
+int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
+			int degraded)
 {
 	bitmap_counter_t *bmc;
 	int rv;
@@ -1362,8 +1361,10 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks)
 			rv = 1;
 		else if (NEEDED(*bmc)) {
 			rv = 1;
-			*bmc |= RESYNC_MASK;
-			*bmc &= ~NEEDED_MASK;
+			if (!degraded) { /* don't set/clear bits if degraded */
+				*bmc |= RESYNC_MASK;
+				*bmc &= ~NEEDED_MASK;
+			}
 		}
 	}
 	spin_unlock_irq(&bitmap->lock);
@@ -1423,35 +1424,53 @@ void bitmap_close_sync(struct bitmap *bitmap)
 	}
 }
 
-static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
-				   unsigned long sectors, int in_sync)
+static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset)
 {
 	/* For each chunk covered by any of these sectors, set the
-	 * counter to 1 and set resync_needed unless in_sync.  They should all
+	 * counter to 1 and set resync_needed.  They should all
 	 * be 0 at this point
 	 */
-	while (sectors) {
-		int secs;
-		bitmap_counter_t *bmc;
-		spin_lock_irq(&bitmap->lock);
-		bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
-		if (!bmc) {
-			spin_unlock_irq(&bitmap->lock);
-			return;
-		}
-		if (! *bmc) {
-			struct page *page;
-			*bmc = 1 | (in_sync? 0 : NEEDED_MASK);
-			bitmap_count_page(bitmap, offset, 1);
-			page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
-			set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
-		}
+
+	int secs;
+	bitmap_counter_t *bmc;
+	spin_lock_irq(&bitmap->lock);
+	bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
+	if (!bmc) {
 		spin_unlock_irq(&bitmap->lock);
-		if (sectors > secs)
-			sectors -= secs;
-		else
-			sectors = 0;
+		return;
+	}
+	if (! *bmc) {
+		struct page *page;
+		*bmc = 1 | NEEDED_MASK;
+		bitmap_count_page(bitmap, offset, 1);
+		page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
+		set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
 	}
+	spin_unlock_irq(&bitmap->lock);
+
+}
+
+/*
+ * flush out any pending updates
+ */
+void bitmap_flush(mddev_t *mddev)
+{
+	struct bitmap *bitmap = mddev->bitmap;
+	int sleep;
+
+	if (!bitmap) /* there was no bitmap */
+		return;
+
+	/* run the daemon_work three time to ensure everything is flushed
+	 * that can be
+	 */
+	sleep = bitmap->daemon_sleep;
+	bitmap->daemon_sleep = 0;
+	bitmap_daemon_work(bitmap);
+	bitmap_daemon_work(bitmap);
+	bitmap_daemon_work(bitmap);
+	bitmap->daemon_sleep = sleep;
+	bitmap_update_sb(bitmap);
 }
 
 /*
@@ -1549,7 +1568,7 @@ int bitmap_create(mddev_t *mddev)
 
 	bitmap->syncchunk = ~0UL;
 
-#if INJECT_FATAL_FAULT_1
+#ifdef INJECT_FATAL_FAULT_1
 	bitmap->bp = NULL;
 #else
 	bitmap->bp = kmalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL);
@@ -1562,7 +1581,8 @@ int bitmap_create(mddev_t *mddev)
 
 	/* now that we have some pages available, initialize the in-memory
 	 * bitmap from the on-disk bitmap */
-	err = bitmap_init_from_disk(bitmap, mddev->recovery_cp == MaxSector);
+	err = bitmap_init_from_disk(bitmap);
+
 	if (err)
 		return err;
 
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index ee3c869d9701..200a0688f717 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -122,14 +122,6 @@ static struct hash_cell *__get_uuid_cell(const char *str)
 /*-----------------------------------------------------------------
  * Inserting, removing and renaming a device.
  *---------------------------------------------------------------*/
-static inline char *kstrdup(const char *str)
-{
-	char *r = kmalloc(strlen(str) + 1, GFP_KERNEL);
-	if (r)
-		strcpy(r, str);
-	return r;
-}
-
 static struct hash_cell *alloc_cell(const char *name, const char *uuid,
 				    struct mapped_device *md)
 {
@@ -139,7 +131,7 @@ static struct hash_cell *alloc_cell(const char *name, const char *uuid,
 	if (!hc)
 		return NULL;
 
-	hc->name = kstrdup(name);
+	hc->name = kstrdup(name, GFP_KERNEL);
 	if (!hc->name) {
 		kfree(hc);
 		return NULL;
@@ -149,7 +141,7 @@ static struct hash_cell *alloc_cell(const char *name, const char *uuid,
 		hc->uuid = NULL;
 
 	else {
-		hc->uuid = kstrdup(uuid);
+		hc->uuid = kstrdup(uuid, GFP_KERNEL);
 		if (!hc->uuid) {
 			kfree(hc->name);
 			kfree(hc);
@@ -273,7 +265,7 @@ static int dm_hash_rename(const char *old, const char *new)
 	/*
 	 * duplicate new.
 	 */
-	new_name = kstrdup(new);
+	new_name = kstrdup(new, GFP_KERNEL);
 	if (!new_name)
 		return -ENOMEM;
 
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 0c1b8520ef86..785806bdb248 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -63,6 +63,7 @@ struct multipath {
 	unsigned nr_priority_groups;
 	struct list_head priority_groups;
 	unsigned pg_init_required;	/* pg_init needs calling? */
+	unsigned pg_init_in_progress;	/* Only one pg_init allowed at once */
 
 	unsigned nr_valid_paths;	/* Total number of usable paths */
 	struct pgpath *current_pgpath;
@@ -72,7 +73,7 @@ struct multipath {
 
 	unsigned queue_io;		/* Must we queue all I/O? */
 	unsigned queue_if_no_path;	/* Queue I/O if last path fails? */
-	unsigned suspended;		/* Has dm core suspended our I/O? */
+	unsigned saved_queue_if_no_path;/* Saved state during suspension */
 
 	struct work_struct process_queued_ios;
 	struct bio_list queued_ios;
@@ -304,11 +305,12 @@ static int map_io(struct multipath *m, struct bio *bio, struct mpath_io *mpio,
 		m->queue_size--;
 
 	if ((pgpath && m->queue_io) ||
-	    (!pgpath && m->queue_if_no_path && !m->suspended)) {
+	    (!pgpath && m->queue_if_no_path)) {
 		/* Queue for the daemon to resubmit */
 		bio_list_add(&m->queued_ios, bio);
 		m->queue_size++;
-		if (m->pg_init_required || !m->queue_io)
+		if ((m->pg_init_required && !m->pg_init_in_progress) ||
+		    !m->queue_io)
 			queue_work(kmultipathd, &m->process_queued_ios);
 		pgpath = NULL;
 		r = 0;
@@ -333,8 +335,9 @@ static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path)
 
 	spin_lock_irqsave(&m->lock, flags);
 
+	m->saved_queue_if_no_path = m->queue_if_no_path;
 	m->queue_if_no_path = queue_if_no_path;
-	if (!m->queue_if_no_path)
+	if (!m->queue_if_no_path && m->queue_size)
 		queue_work(kmultipathd, &m->process_queued_ios);
 
 	spin_unlock_irqrestore(&m->lock, flags);
@@ -379,25 +382,31 @@ static void process_queued_ios(void *data)
 {
 	struct multipath *m = (struct multipath *) data;
 	struct hw_handler *hwh = &m->hw_handler;
-	struct pgpath *pgpath;
-	unsigned init_required, must_queue = 0;
+	struct pgpath *pgpath = NULL;
+	unsigned init_required = 0, must_queue = 1;
 	unsigned long flags;
 
 	spin_lock_irqsave(&m->lock, flags);
 
+	if (!m->queue_size)
+		goto out;
+
 	if (!m->current_pgpath)
 		__choose_pgpath(m);
 
 	pgpath = m->current_pgpath;
 
-	if ((pgpath && m->queue_io) ||
-	    (!pgpath && m->queue_if_no_path && !m->suspended))
-		must_queue = 1;
+	if ((pgpath && !m->queue_io) ||
+	    (!pgpath && !m->queue_if_no_path))
+		must_queue = 0;
 
-	init_required = m->pg_init_required;
-	if (init_required)
+	if (m->pg_init_required && !m->pg_init_in_progress) {
 		m->pg_init_required = 0;
+		m->pg_init_in_progress = 1;
+		init_required = 1;
+	}
 
+out:
 	spin_unlock_irqrestore(&m->lock, flags);
 
 	if (init_required)
@@ -752,6 +761,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
 static void multipath_dtr(struct dm_target *ti)
 {
 	struct multipath *m = (struct multipath *) ti->private;
+
+	flush_workqueue(kmultipathd);
 	free_multipath(m);
 }
 
@@ -765,6 +776,9 @@ static int multipath_map(struct dm_target *ti, struct bio *bio,
 	struct mpath_io *mpio;
 	struct multipath *m = (struct multipath *) ti->private;
 
+	if (bio_barrier(bio))
+		return -EOPNOTSUPP;
+
 	mpio = mempool_alloc(m->mpio_pool, GFP_NOIO);
 	dm_bio_record(&mpio->details, bio);
 
@@ -837,7 +851,7 @@ static int reinstate_path(struct pgpath *pgpath)
 	pgpath->path.is_active = 1;
 
 	m->current_pgpath = NULL;
-	if (!m->nr_valid_paths++)
+	if (!m->nr_valid_paths++ && m->queue_size)
 		queue_work(kmultipathd, &m->process_queued_ios);
 
 	queue_work(kmultipathd, &m->trigger_event);
@@ -963,12 +977,13 @@ void dm_pg_init_complete(struct path *path, unsigned err_flags)
 		bypass_pg(m, pg, 1);
 
 	spin_lock_irqsave(&m->lock, flags);
-	if (!err_flags)
-		m->queue_io = 0;
-	else {
+	if (err_flags) {
 		m->current_pgpath = NULL;
 		m->current_pg = NULL;
-	}
+	} else if (!m->pg_init_required)
+		m->queue_io = 0;
+
+	m->pg_init_in_progress = 0;
 	queue_work(kmultipathd, &m->process_queued_ios);
 	spin_unlock_irqrestore(&m->lock, flags);
 }
@@ -988,9 +1003,12 @@ static int do_end_io(struct multipath *m, struct bio *bio,
 	if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
 		return error;
 
+	if (error == -EOPNOTSUPP)
+		return error;
+
 	spin_lock(&m->lock);
 	if (!m->nr_valid_paths) {
-		if (!m->queue_if_no_path || m->suspended) {
+		if (!m->queue_if_no_path) {
 			spin_unlock(&m->lock);
 			return -EIO;
 		} else {
@@ -1051,27 +1069,27 @@ static int multipath_end_io(struct dm_target *ti, struct bio *bio,
 
 /*
  * Suspend can't complete until all the I/O is processed so if
- * the last path failed we will now error any queued I/O.
+ * the last path fails we must error any remaining I/O.
+ * Note that if the freeze_bdev fails while suspending, the
+ * queue_if_no_path state is lost - userspace should reset it.
  */
 static void multipath_presuspend(struct dm_target *ti)
 {
 	struct multipath *m = (struct multipath *) ti->private;
-	unsigned long flags;
 
-	spin_lock_irqsave(&m->lock, flags);
-	m->suspended = 1;
-	if (m->queue_if_no_path)
-		queue_work(kmultipathd, &m->process_queued_ios);
-	spin_unlock_irqrestore(&m->lock, flags);
+	queue_if_no_path(m, 0);
 }
 
+/*
+ * Restore the queue_if_no_path setting.
+ */
 static void multipath_resume(struct dm_target *ti)
 {
 	struct multipath *m = (struct multipath *) ti->private;
 	unsigned long flags;
 
 	spin_lock_irqsave(&m->lock, flags);
-	m->suspended = 0;
+	m->queue_if_no_path = m->saved_queue_if_no_path;
 	spin_unlock_irqrestore(&m->lock, flags);
 }
 
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 6e3cf7e13451..b08df8b9b2ca 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1060,6 +1060,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	ti->private = ms;
+ 	ti->split_io = ms->rh.region_size;
 
 	r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client);
 	if (r) {
@@ -1229,7 +1230,7 @@ static int __init dm_mirror_init(void)
 	if (r)
 		return r;
 
-	_kmirrord_wq = create_workqueue("kmirrord");
+	_kmirrord_wq = create_singlethread_workqueue("kmirrord");
 	if (!_kmirrord_wq) {
 		DMERR("couldn't start kmirrord");
 		dm_dirty_log_exit();
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 7e691ab9a748..ab54f99b7c3b 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -777,7 +777,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
 
 	/* Full snapshots are not usable */
 	if (!s->valid)
-		return -1;
+		return -EIO;
 
 	/*
 	 * Write to snapshot - higher level takes care of RW/RO
@@ -931,6 +931,10 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
 		if (!snap->valid)
 			continue;
 
+		/* Nothing to do if writing beyond end of snapshot */
+		if (bio->bi_sector >= dm_table_get_size(snap->table))
+			continue;
+
 		down_write(&snap->lock);
 
 		/*
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 18e9b9953fcd..a6d3baa46f61 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -869,11 +869,17 @@ static void suspend_targets(struct dm_table *t, unsigned postsuspend)
 
 void dm_table_presuspend_targets(struct dm_table *t)
 {
+	if (!t)
+		return;
+
 	return suspend_targets(t, 0);
 }
 
 void dm_table_postsuspend_targets(struct dm_table *t)
 {
+	if (!t)
+		return;
+
 	return suspend_targets(t, 1);
 }
 
@@ -943,6 +949,7 @@ EXPORT_SYMBOL(dm_vcalloc);
 EXPORT_SYMBOL(dm_get_device);
 EXPORT_SYMBOL(dm_put_device);
 EXPORT_SYMBOL(dm_table_event);
+EXPORT_SYMBOL(dm_table_get_size);
 EXPORT_SYMBOL(dm_table_get_mode);
 EXPORT_SYMBOL(dm_table_put);
 EXPORT_SYMBOL(dm_table_get);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f6b03957efc7..d487d9deb98e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -55,10 +55,10 @@ union map_info *dm_get_mapinfo(struct bio *bio)
  */
 #define DMF_BLOCK_IO 0
 #define DMF_SUSPENDED 1
-#define DMF_FS_LOCKED 2
 
 struct mapped_device {
-	struct rw_semaphore lock;
+	struct rw_semaphore io_lock;
+	struct semaphore suspend_lock;
 	rwlock_t map_lock;
 	atomic_t holders;
 
@@ -248,16 +248,16 @@ static inline void free_tio(struct mapped_device *md, struct target_io *tio)
  */
 static int queue_io(struct mapped_device *md, struct bio *bio)
 {
-	down_write(&md->lock);
+	down_write(&md->io_lock);
 
 	if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
-		up_write(&md->lock);
+		up_write(&md->io_lock);
 		return 1;
 	}
 
 	bio_list_add(&md->deferred, bio);
 
-	up_write(&md->lock);
+	up_write(&md->io_lock);
 	return 0;		/* deferred successfully */
 }
 
@@ -384,7 +384,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
 		/* error the io and bail out */
 		struct dm_io *io = tio->io;
 		free_tio(tio->io->md, tio);
-		dec_pending(io, -EIO);
+		dec_pending(io, r);
 		bio_put(clone);
 	}
 }
@@ -568,14 +568,14 @@ static int dm_request(request_queue_t *q, struct bio *bio)
 	int r;
 	struct mapped_device *md = q->queuedata;
 
-	down_read(&md->lock);
+	down_read(&md->io_lock);
 
 	/*
 	 * If we're suspended we have to queue
 	 * this io for later.
 	 */
 	while (test_bit(DMF_BLOCK_IO, &md->flags)) {
-		up_read(&md->lock);
+		up_read(&md->io_lock);
 
 		if (bio_rw(bio) == READA) {
 			bio_io_error(bio, bio->bi_size);
@@ -594,11 +594,11 @@ static int dm_request(request_queue_t *q, struct bio *bio)
 		 * We're in a while loop, because someone could suspend
 		 * before we get to the following read lock.
 		 */
-		down_read(&md->lock);
+		down_read(&md->io_lock);
 	}
 
 	__split_bio(md, bio);
-	up_read(&md->lock);
+	up_read(&md->io_lock);
 	return 0;
 }
 
@@ -610,7 +610,7 @@ static int dm_flush_all(request_queue_t *q, struct gendisk *disk,
 	int ret = -ENXIO;
 
 	if (map) {
-		ret = dm_table_flush_all(md->map);
+		ret = dm_table_flush_all(map);
 		dm_table_put(map);
 	}
 
@@ -747,7 +747,8 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent)
 		goto bad1;
 
 	memset(md, 0, sizeof(*md));
-	init_rwsem(&md->lock);
+	init_rwsem(&md->io_lock);
+	init_MUTEX(&md->suspend_lock);
 	rwlock_init(&md->map_lock);
 	atomic_set(&md->holders, 1);
 	atomic_set(&md->event_nr, 0);
@@ -825,18 +826,13 @@ static void event_callback(void *context)
 	wake_up(&md->eventq);
 }
 
-static void __set_size(struct gendisk *disk, sector_t size)
+static void __set_size(struct mapped_device *md, sector_t size)
 {
-	struct block_device *bdev;
-
-	set_capacity(disk, size);
-	bdev = bdget_disk(disk, 0);
-	if (bdev) {
-		down(&bdev->bd_inode->i_sem);
-		i_size_write(bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
-		up(&bdev->bd_inode->i_sem);
-		bdput(bdev);
-	}
+	set_capacity(md->disk, size);
+
+	down(&md->frozen_bdev->bd_inode->i_sem);
+	i_size_write(md->frozen_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
+	up(&md->frozen_bdev->bd_inode->i_sem);
 }
 
 static int __bind(struct mapped_device *md, struct dm_table *t)
@@ -845,17 +841,18 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
 	sector_t size;
 
 	size = dm_table_get_size(t);
-	__set_size(md->disk, size);
+	__set_size(md, size);
 	if (size == 0)
 		return 0;
 
+	dm_table_get(t);
+	dm_table_event_callback(t, event_callback, md);
+
 	write_lock(&md->map_lock);
 	md->map = t;
+	dm_table_set_restrictions(t, q);
 	write_unlock(&md->map_lock);
 
-	dm_table_get(t);
-	dm_table_event_callback(md->map, event_callback, md);
-	dm_table_set_restrictions(t, q);
 	return 0;
 }
 
@@ -935,7 +932,7 @@ void dm_put(struct mapped_device *md)
 	struct dm_table *map = dm_get_table(md);
 
 	if (atomic_dec_and_test(&md->holders)) {
-		if (!test_bit(DMF_SUSPENDED, &md->flags) && map) {
+		if (!dm_suspended(md)) {
 			dm_table_presuspend_targets(map);
 			dm_table_postsuspend_targets(map);
 		}
@@ -966,39 +963,33 @@ static void __flush_deferred_io(struct mapped_device *md, struct bio *c)
  */
 int dm_swap_table(struct mapped_device *md, struct dm_table *table)
 {
-	int r;
+	int r = -EINVAL;
 
-	down_write(&md->lock);
+	down(&md->suspend_lock);
 
 	/* device must be suspended */
-	if (!test_bit(DMF_SUSPENDED, &md->flags)) {
-		up_write(&md->lock);
-		return -EPERM;
-	}
+	if (!dm_suspended(md))
+		goto out;
 
 	__unbind(md);
 	r = __bind(md, table);
-	if (r)
-		return r;
 
-	up_write(&md->lock);
-	return 0;
+out:
+	up(&md->suspend_lock);
+	return r;
 }
 
 /*
  * Functions to lock and unlock any filesystem running on the
  * device.
  */
-static int __lock_fs(struct mapped_device *md)
+static int lock_fs(struct mapped_device *md)
 {
-	int error = -ENOMEM;
-
-	if (test_and_set_bit(DMF_FS_LOCKED, &md->flags))
-		return 0;
+	int r = -ENOMEM;
 
 	md->frozen_bdev = bdget_disk(md->disk, 0);
 	if (!md->frozen_bdev) {
-		DMWARN("bdget failed in __lock_fs");
+		DMWARN("bdget failed in lock_fs");
 		goto out;
 	}
 
@@ -1006,13 +997,13 @@ static int __lock_fs(struct mapped_device *md)
 
 	md->frozen_sb = freeze_bdev(md->frozen_bdev);
 	if (IS_ERR(md->frozen_sb)) {
-		error = PTR_ERR(md->frozen_sb);
+		r = PTR_ERR(md->frozen_sb);
 		goto out_bdput;
 	}
 
 	/* don't bdput right now, we don't want the bdev
 	 * to go away while it is locked.  We'll bdput
-	 * in __unlock_fs
+	 * in unlock_fs
 	 */
 	return 0;
 
@@ -1021,15 +1012,11 @@ out_bdput:
 	md->frozen_sb = NULL;
 	md->frozen_bdev = NULL;
 out:
-	clear_bit(DMF_FS_LOCKED, &md->flags);
-	return error;
+	return r;
 }
 
-static void __unlock_fs(struct mapped_device *md)
+static void unlock_fs(struct mapped_device *md)
 {
-	if (!test_and_clear_bit(DMF_FS_LOCKED, &md->flags))
-		return;
-
 	thaw_bdev(md->frozen_bdev, md->frozen_sb);
 	bdput(md->frozen_bdev);
 
@@ -1046,47 +1033,37 @@ static void __unlock_fs(struct mapped_device *md)
  */
 int dm_suspend(struct mapped_device *md)
 {
-	struct dm_table *map;
+	struct dm_table *map = NULL;
 	DECLARE_WAITQUEUE(wait, current);
-	int error = -EINVAL;
+	int r = -EINVAL;
 
-	/* Flush I/O to the device. */
-	down_read(&md->lock);
-	if (test_bit(DMF_BLOCK_IO, &md->flags))
-		goto out_read_unlock;
+	down(&md->suspend_lock);
 
-	error = __lock_fs(md);
-	if (error)
-		goto out_read_unlock;
+	if (dm_suspended(md))
+		goto out;
 
 	map = dm_get_table(md);
-	if (map)
-		dm_table_presuspend_targets(map);
 
-	up_read(&md->lock);
+	/* This does not get reverted if there's an error later. */
+	dm_table_presuspend_targets(map);
+
+	/* Flush I/O to the device. */
+	r = lock_fs(md);
+	if (r)
+		goto out;
 
 	/*
 	 * First we set the BLOCK_IO flag so no more ios will be mapped.
-	 *
-	 * If the flag is already set we know another thread is trying to
-	 * suspend as well, so we leave the fs locked for this thread.
 	 */
-	error = -EINVAL;
-	down_write(&md->lock);
-	if (test_and_set_bit(DMF_BLOCK_IO, &md->flags)) {
-		if (map)
-			dm_table_put(map);
-		goto out_write_unlock;
-	}
+	down_write(&md->io_lock);
+	set_bit(DMF_BLOCK_IO, &md->flags);
 
 	add_wait_queue(&md->wait, &wait);
-	up_write(&md->lock);
+	up_write(&md->io_lock);
 
 	/* unplug */
-	if (map) {
+	if (map)
 		dm_table_unplug_all(map);
-		dm_table_put(map);
-	}
 
 	/*
 	 * Then we wait for the already mapped ios to
@@ -1102,63 +1079,67 @@ int dm_suspend(struct mapped_device *md)
 	}
 	set_current_state(TASK_RUNNING);
 
-	down_write(&md->lock);
+	down_write(&md->io_lock);
 	remove_wait_queue(&md->wait, &wait);
 
 	/* were we interrupted ? */
-	error = -EINTR;
-	if (atomic_read(&md->pending))
-		goto out_unfreeze;
+	r = -EINTR;
+	if (atomic_read(&md->pending)) {
+		up_write(&md->io_lock);
+		unlock_fs(md);
+		clear_bit(DMF_BLOCK_IO, &md->flags);
+		goto out;
+	}
+	up_write(&md->io_lock);
 
-	set_bit(DMF_SUSPENDED, &md->flags);
+	dm_table_postsuspend_targets(map);
 
-	map = dm_get_table(md);
-	if (map)
-		dm_table_postsuspend_targets(map);
-	dm_table_put(map);
-	up_write(&md->lock);
-
-	return 0;
+	set_bit(DMF_SUSPENDED, &md->flags);
 
-out_unfreeze:
-	/* FIXME Undo dm_table_presuspend_targets */
-	__unlock_fs(md);
-	clear_bit(DMF_BLOCK_IO, &md->flags);
-out_write_unlock:
-	up_write(&md->lock);
-	return error;
+	r = 0;
 
-out_read_unlock:
-	up_read(&md->lock);
-	return error;
+out:
+	dm_table_put(map);
+	up(&md->suspend_lock);
+	return r;
 }
 
 int dm_resume(struct mapped_device *md)
 {
+	int r = -EINVAL;
 	struct bio *def;
-	struct dm_table *map = dm_get_table(md);
+	struct dm_table *map = NULL;
 
-	down_write(&md->lock);
-	if (!map ||
-	    !test_bit(DMF_SUSPENDED, &md->flags) ||
-	    !dm_table_get_size(map)) {
-		up_write(&md->lock);
-		dm_table_put(map);
-		return -EINVAL;
-	}
+	down(&md->suspend_lock);
+	if (!dm_suspended(md))
+		goto out;
+
+	map = dm_get_table(md);
+	if (!map || !dm_table_get_size(map))
+		goto out;
 
 	dm_table_resume_targets(map);
-	clear_bit(DMF_SUSPENDED, &md->flags);
+
+	down_write(&md->io_lock);
 	clear_bit(DMF_BLOCK_IO, &md->flags);
 
 	def = bio_list_get(&md->deferred);
 	__flush_deferred_io(md, def);
-	up_write(&md->lock);
-	__unlock_fs(md);
+	up_write(&md->io_lock);
+
+	unlock_fs(md);
+
+	clear_bit(DMF_SUSPENDED, &md->flags);
+
 	dm_table_unplug_all(map);
+
+	r = 0;
+
+out:
 	dm_table_put(map);
+	up(&md->suspend_lock);
 
-	return 0;
+	return r;
 }
 
 /*-----------------------------------------------------------------
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0c6b5b6baff6..480f658db6f2 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -284,7 +284,7 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
 	return NULL;
 }
 
-inline static sector_t calc_dev_sboffset(struct block_device *bdev)
+static inline sector_t calc_dev_sboffset(struct block_device *bdev)
 {
 	sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
 	return MD_NEW_SIZE_BLOCKS(size);
@@ -338,6 +338,7 @@ static int super_written(struct bio *bio, unsigned int bytes_done, int error)
 
 	if (atomic_dec_and_test(&rdev->mddev->pending_writes))
 		wake_up(&rdev->mddev->sb_wait);
+	bio_put(bio);
 	return 0;
 }
 
@@ -1797,6 +1798,8 @@ static int do_md_stop(mddev_t * mddev, int ro)
 				goto out;
 			mddev->ro = 1;
 		} else {
+			bitmap_flush(mddev);
+			wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
 			if (mddev->ro)
 				set_disk_ro(disk, 0);
 			blk_queue_make_request(mddev->queue, md_fail_request);
@@ -2976,8 +2979,7 @@ static int md_thread(void * arg)
 		wait_event_interruptible_timeout(thread->wqueue,
 						 test_bit(THREAD_WAKEUP, &thread->flags),
 						 thread->timeout);
-		if (current->flags & PF_FREEZE)
-			refrigerator(PF_FREEZE);
+		try_to_freeze();
 
 		clear_bit(THREAD_WAKEUP, &thread->flags);
 
@@ -3484,7 +3486,6 @@ static void md_do_sync(mddev_t *mddev)
 			goto skip;
 		}
 		ITERATE_MDDEV(mddev2,tmp) {
-			printk(".");
 			if (mddev2 == mddev)
 				continue;
 			if (mddev2->curr_resync && 
@@ -4007,3 +4008,4 @@ EXPORT_SYMBOL(md_wakeup_thread);
 EXPORT_SYMBOL(md_print_devices);
 EXPORT_SYMBOL(md_check_recovery);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("md");
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index e11dd14d0b43..2120710172c5 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -314,16 +314,16 @@ static int raid0_run (mddev_t *mddev)
 		sector_t space = conf->hash_spacing;
 		int round;
 		conf->preshift = 0;
-		if (sizeof(sector_t) > sizeof(unsigned long)) {
+		if (sizeof(sector_t) > sizeof(u32)) {
 			/*shift down space and s so that sector_div will work */
-			while (space > (sector_t) (~(unsigned long)0)) {
+			while (space > (sector_t) (~(u32)0)) {
 				s >>= 1;
 				space >>= 1;
 				s += 1; /* force round-up */
 				conf->preshift++;
 			}
 		}
-		round = sector_div(s, (unsigned long)space) ? 1 : 0;
+		round = sector_div(s, (u32)space) ? 1 : 0;
 		nb_zone = s + round;
 	}
 	printk("raid0 : nb_zone is %d.\n", nb_zone);
@@ -443,7 +443,7 @@ static int raid0_make_request (request_queue_t *q, struct bio *bio)
 		volatile
 #endif
 		sector_t x = block >> conf->preshift;
-		sector_div(x, (unsigned long)conf->hash_spacing);
+		sector_div(x, (u32)conf->hash_spacing);
 		zone = conf->hash_table[x];
 	}
  
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ff1dbec864af..51d9645ed09c 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -893,7 +893,6 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
 	if (!uptodate) {
 		md_error(r1_bio->mddev,
 			 conf->mirrors[r1_bio->read_disk].rdev);
-		set_bit(R1BIO_Degraded, &r1_bio->state);
 	} else
 		set_bit(R1BIO_Uptodate, &r1_bio->state);
 	rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev);
@@ -918,10 +917,9 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error)
 			mirror = i;
 			break;
 		}
-	if (!uptodate) {
+	if (!uptodate)
 		md_error(mddev, conf->mirrors[mirror].rdev);
-		set_bit(R1BIO_Degraded, &r1_bio->state);
-	}
+
 	update_head_pos(mirror, r1_bio);
 
 	if (atomic_dec_and_test(&r1_bio->remaining)) {
@@ -1109,6 +1107,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	int i;
 	int write_targets = 0;
 	int sync_blocks;
+	int still_degraded = 0;
 
 	if (!conf->r1buf_pool)
 	{
@@ -1126,21 +1125,22 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		 * only be one in raid1 resync.
 		 * We can find the current addess in mddev->curr_resync
 		 */
-		if (!conf->fullsync) {
-			if (mddev->curr_resync < max_sector)
-				bitmap_end_sync(mddev->bitmap,
-						mddev->curr_resync,
+		if (mddev->curr_resync < max_sector) /* aborted */
+			bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
 						&sync_blocks, 1);
-			bitmap_close_sync(mddev->bitmap);
-		}
-		if (mddev->curr_resync >= max_sector)
+		else /* completed sync */
 			conf->fullsync = 0;
+
+		bitmap_close_sync(mddev->bitmap);
 		close_sync(conf);
 		return 0;
 	}
 
-	if (!conf->fullsync &&
-	    !bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks)) {
+	/* before building a request, check if we can skip these blocks..
+	 * This call the bitmap_start_sync doesn't actually record anything
+	 */
+	if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
+	    !conf->fullsync) {
 		/* We can skip this block, and probably several more */
 		*skipped = 1;
 		return sync_blocks;
@@ -1205,24 +1205,23 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		if (i == disk) {
 			bio->bi_rw = READ;
 			bio->bi_end_io = end_sync_read;
-		} else if (conf->mirrors[i].rdev &&
-			   !conf->mirrors[i].rdev->faulty &&
-			   (!conf->mirrors[i].rdev->in_sync ||
-			    sector_nr + RESYNC_SECTORS > mddev->recovery_cp)) {
+		} else if (conf->mirrors[i].rdev == NULL ||
+			   conf->mirrors[i].rdev->faulty) {
+			still_degraded = 1;
+			continue;
+		} else if (!conf->mirrors[i].rdev->in_sync ||
+			   sector_nr + RESYNC_SECTORS > mddev->recovery_cp) {
 			bio->bi_rw = WRITE;
 			bio->bi_end_io = end_sync_write;
 			write_targets ++;
 		} else
+			/* no need to read or write here */
 			continue;
 		bio->bi_sector = sector_nr + conf->mirrors[i].rdev->data_offset;
 		bio->bi_bdev = conf->mirrors[i].rdev->bdev;
 		bio->bi_private = r1_bio;
 	}
 
-	if (write_targets + 1 < conf->raid_disks)
-		/* array degraded, can't clear bitmap */
-		set_bit(R1BIO_Degraded, &r1_bio->state);
-
 	if (write_targets == 0) {
 		/* There is nowhere to write, so all non-sync
 		 * drives must be failed - so we are finished
@@ -1243,15 +1242,15 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 			len = (max_sector - sector_nr) << 9;
 		if (len == 0)
 			break;
-		if (!conf->fullsync) {
-			if (sync_blocks == 0) {
-				if (!bitmap_start_sync(mddev->bitmap,
-						       sector_nr, &sync_blocks))
-					break;
-				if (sync_blocks < (PAGE_SIZE>>9))
-					BUG();
-				if (len > (sync_blocks<<9)) len = sync_blocks<<9;
-			}
+		if (sync_blocks == 0) {
+			if (!bitmap_start_sync(mddev->bitmap, sector_nr,
+					&sync_blocks, still_degraded) &&
+					!conf->fullsync)
+				break;
+			if (sync_blocks < (PAGE_SIZE>>9))
+				BUG();
+			if (len > (sync_blocks<<9))
+				len = sync_blocks<<9;
 		}
 
 		for (i=0 ; i < conf->raid_disks; i++) {
@@ -1264,7 +1263,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 					while (i > 0) {
 						i--;
 						bio = r1_bio->bios[i];
-						if (bio->bi_end_io==NULL) continue;
+						if (bio->bi_end_io==NULL)
+							continue;
 						/* remove last page from this bio */
 						bio->bi_vcnt--;
 						bio->bi_size -= len;
@@ -1469,6 +1469,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
 		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	}
 	mddev->size = mddev->array_size;
+	mddev->resync_max_sectors = sectors;
 	return 0;
 }
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 93a9726cc2d6..43f231a467d5 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1653,6 +1653,7 @@ static int run (mddev_t *mddev)
 
 	/* device size must be a multiple of chunk size */
 	mddev->size &= ~(mddev->chunk_size/1024 -1);
+	mddev->resync_max_sectors = mddev->size << 1;
 
 	if (!conf->chunk_size || conf->chunk_size % 4) {
 		printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
@@ -1931,6 +1932,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
 		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	}
 	mddev->size = sectors /2;
+	mddev->resync_max_sectors = sectors;
 	return 0;
 }
 
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index f62ea1a73d0d..495dee1d1e83 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -1813,6 +1813,7 @@ static int run (mddev_t *mddev)
 
 	/* device size must be a multiple of chunk size */
 	mddev->size &= ~(mddev->chunk_size/1024 -1);
+	mddev->resync_max_sectors = mddev->size << 1;
 
 	if (conf->raid_disks < 4) {
 		printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n",
@@ -2095,6 +2096,7 @@ static int raid6_resize(mddev_t *mddev, sector_t sectors)
 		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	}
 	mddev->size = sectors /2;
+	mddev->resync_max_sectors = sectors;
 	return 0;
 }
author	Jeff Garzik <jgarzik@pobox.com>	2005-08-10 13:46:28 -0400
committer	Jeff Garzik <jgarzik@pobox.com>	2005-08-10 13:46:28 -0400
commit	2f058256cb64e346f4fb4499ff4e0f1c2791a4b4 (patch)
tree	91e06602f4d3abb6812ea8c9bc9ba4501e14c84e /drivers/md
parent	0274aa2506fd2fe89a58dd6cd64d3b3f7b976af8 (diff)
parent	86b3786078d63242d3194ffc58ae8dae1d1bbef3 (diff)
download	blackbird-op-linux-2f058256cb64e346f4fb4499ff4e0f1c2791a4b4.tar.gz blackbird-op-linux-2f058256cb64e346f4fb4499ff4e0f1c2791a4b4.zip