diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-exception-store.c | 13 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.h | 4 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-base.c | 39 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-transfer.c | 8 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-transfer.h | 2 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 8 | ||||
-rw-r--r-- | drivers/md/dm-snap-persistent.c | 88 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 23 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 13 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 51 | ||||
-rw-r--r-- | drivers/md/dm.c | 15 | ||||
-rw-r--r-- | drivers/md/md.c | 33 | ||||
-rw-r--r-- | drivers/md/md.h | 10 | ||||
-rw-r--r-- | drivers/md/raid5.c | 34 |
14 files changed, 240 insertions, 101 deletions
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 3710ff88fc10..556acff3952f 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -171,6 +171,14 @@ static int set_chunk_size(struct dm_exception_store *store, */ chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9); + return dm_exception_store_set_chunk_size(store, chunk_size_ulong, + error); +} + +int dm_exception_store_set_chunk_size(struct dm_exception_store *store, + unsigned long chunk_size_ulong, + char **error) +{ /* Check chunk_size is a power of 2 */ if (!is_power_of_2(chunk_size_ulong)) { *error = "Chunk size is not a power of 2"; @@ -183,6 +191,11 @@ static int set_chunk_size(struct dm_exception_store *store, return -EINVAL; } + if (chunk_size_ulong > INT_MAX >> SECTOR_SHIFT) { + *error = "Chunk size is too high"; + return -EINVAL; + } + store->chunk_size = chunk_size_ulong; store->chunk_mask = chunk_size_ulong - 1; store->chunk_shift = ffs(chunk_size_ulong) - 1; diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 2442c8c07898..812c71872ba0 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -168,6 +168,10 @@ static inline chunk_t sector_to_chunk(struct dm_exception_store *store, int dm_exception_store_type_register(struct dm_exception_store_type *type); int dm_exception_store_type_unregister(struct dm_exception_store_type *type); +int dm_exception_store_set_chunk_size(struct dm_exception_store *store, + unsigned long chunk_size_ulong, + char **error); + int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, unsigned *args_used, struct dm_exception_store **store); diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index e69b96560997..6e186b1a062d 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c @@ -21,6 +21,7 @@ struct log_c { struct dm_target *ti; uint32_t region_size; region_t region_count; + uint64_t luid; char uuid[DM_UUID_LEN]; char *usr_argv_str; @@ -63,7 +64,7 @@ static int userspace_do_request(struct log_c *lc, const char *uuid, * restored. */ retry: - r = dm_consult_userspace(uuid, request_type, data, + r = dm_consult_userspace(uuid, lc->luid, request_type, data, data_size, rdata, rdata_size); if (r != -ESRCH) @@ -74,14 +75,15 @@ retry: set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(2*HZ); DMWARN("Attempting to contact userspace log server..."); - r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str, + r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR, + lc->usr_argv_str, strlen(lc->usr_argv_str) + 1, NULL, NULL); if (!r) break; } DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete"); - r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL, + r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL, 0, NULL, NULL); if (!r) goto retry; @@ -111,10 +113,9 @@ static int build_constructor_string(struct dm_target *ti, return -ENOMEM; } - for (i = 0, str_size = 0; i < argc; i++) - str_size += sprintf(str + str_size, "%s ", argv[i]); - str_size += sprintf(str + str_size, "%llu", - (unsigned long long)ti->len); + str_size = sprintf(str, "%llu", (unsigned long long)ti->len); + for (i = 0; i < argc; i++) + str_size += sprintf(str + str_size, " %s", argv[i]); *ctr_str = str; return str_size; @@ -154,6 +155,9 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, return -ENOMEM; } + /* The ptr value is sufficient for local unique id */ + lc->luid = (uint64_t)lc; + lc->ti = ti; if (strlen(argv[0]) > (DM_UUID_LEN - 1)) { @@ -173,7 +177,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, } /* Send table string */ - r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR, + r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR, ctr_str, str_size, NULL, NULL); if (r == -ESRCH) { @@ -183,7 +187,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, /* Since the region size does not change, get it now */ rdata_size = sizeof(rdata); - r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE, + r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE, NULL, 0, (char *)&rdata, &rdata_size); if (r) { @@ -212,7 +216,7 @@ static void userspace_dtr(struct dm_dirty_log *log) int r; struct log_c *lc = log->context; - r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR, + r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR, NULL, 0, NULL, NULL); @@ -227,7 +231,7 @@ static int userspace_presuspend(struct dm_dirty_log *log) int r; struct log_c *lc = log->context; - r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND, + r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND, NULL, 0, NULL, NULL); @@ -239,7 +243,7 @@ static int userspace_postsuspend(struct dm_dirty_log *log) int r; struct log_c *lc = log->context; - r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND, + r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND, NULL, 0, NULL, NULL); @@ -252,7 +256,7 @@ static int userspace_resume(struct dm_dirty_log *log) struct log_c *lc = log->context; lc->in_sync_hint = 0; - r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME, + r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME, NULL, 0, NULL, NULL); @@ -561,6 +565,7 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, char *result, unsigned maxlen) { int r = 0; + char *table_args; size_t sz = (size_t)maxlen; struct log_c *lc = log->context; @@ -577,8 +582,12 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, break; case STATUSTYPE_TABLE: sz = 0; - DMEMIT("%s %u %s %s", log->type->name, lc->usr_argc + 1, - lc->uuid, lc->usr_argv_str); + table_args = strstr(lc->usr_argv_str, " "); + BUG_ON(!table_args); /* There will always be a ' ' */ + table_args++; + + DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc, + lc->uuid, table_args); break; } return (r) ? 0 : (int)sz; diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c index 0ca1ee768a1f..ba0edad2d048 100644 --- a/drivers/md/dm-log-userspace-transfer.c +++ b/drivers/md/dm-log-userspace-transfer.c @@ -108,7 +108,7 @@ static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr) *(pkg->data_size) = 0; } else if (tfr->data_size > *(pkg->data_size)) { DMERR("Insufficient space to receive package [%u] " - "(%u vs %lu)", tfr->request_type, + "(%u vs %zu)", tfr->request_type, tfr->data_size, *(pkg->data_size)); *(pkg->data_size) = 0; @@ -147,7 +147,8 @@ static void cn_ulog_callback(void *data) /** * dm_consult_userspace - * @uuid: log's uuid (must be DM_UUID_LEN in size) + * @uuid: log's universal unique identifier (must be DM_UUID_LEN in size) + * @luid: log's local unique identifier * @request_type: found in include/linux/dm-log-userspace.h * @data: data to tx to the server * @data_size: size of data in bytes @@ -163,7 +164,7 @@ static void cn_ulog_callback(void *data) * * Returns: 0 on success, -EXXX on failure **/ -int dm_consult_userspace(const char *uuid, int request_type, +int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type, char *data, size_t data_size, char *rdata, size_t *rdata_size) { @@ -190,6 +191,7 @@ resend: memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size); memcpy(tfr->uuid, uuid, DM_UUID_LEN); + tfr->luid = luid; tfr->seq = dm_ulog_seq++; /* diff --git a/drivers/md/dm-log-userspace-transfer.h b/drivers/md/dm-log-userspace-transfer.h index c26d8e4e2710..04ee874f9153 100644 --- a/drivers/md/dm-log-userspace-transfer.h +++ b/drivers/md/dm-log-userspace-transfer.h @@ -11,7 +11,7 @@ int dm_ulog_tfr_init(void); void dm_ulog_tfr_exit(void); -int dm_consult_userspace(const char *uuid, int request_type, +int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type, char *data, size_t data_size, char *rdata, size_t *rdata_size); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 9726577cde49..33f179e66bf5 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -648,7 +648,13 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) */ dm_rh_inc_pending(ms->rh, &sync); dm_rh_inc_pending(ms->rh, &nosync); - ms->log_failure = dm_rh_flush(ms->rh) ? 1 : 0; + + /* + * If the flush fails on a previous call and succeeds here, + * we must not reset the log_failure variable. We need + * userspace interaction to do that. + */ + ms->log_failure = dm_rh_flush(ms->rh) ? 1 : ms->log_failure; /* * Dispatch io. diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 6e3fe4f14934..d5b2e08750d5 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -106,6 +106,13 @@ struct pstore { void *zero_area; /* + * An area used for header. The header can be written + * concurrently with metadata (when invalidating the snapshot), + * so it needs a separate buffer. + */ + void *header_area; + + /* * Used to keep track of which metadata area the data in * 'chunk' refers to. */ @@ -148,16 +155,27 @@ static int alloc_area(struct pstore *ps) */ ps->area = vmalloc(len); if (!ps->area) - return r; + goto err_area; ps->zero_area = vmalloc(len); - if (!ps->zero_area) { - vfree(ps->area); - return r; - } + if (!ps->zero_area) + goto err_zero_area; memset(ps->zero_area, 0, len); + ps->header_area = vmalloc(len); + if (!ps->header_area) + goto err_header_area; + return 0; + +err_header_area: + vfree(ps->zero_area); + +err_zero_area: + vfree(ps->area); + +err_area: + return r; } static void free_area(struct pstore *ps) @@ -169,6 +187,10 @@ static void free_area(struct pstore *ps) if (ps->zero_area) vfree(ps->zero_area); ps->zero_area = NULL; + + if (ps->header_area) + vfree(ps->header_area); + ps->header_area = NULL; } struct mdata_req { @@ -188,7 +210,8 @@ static void do_metadata(struct work_struct *work) /* * Read or write a chunk aligned and sized block of data from a device. */ -static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) +static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw, + int metadata) { struct dm_io_region where = { .bdev = ps->store->cow->bdev, @@ -198,7 +221,7 @@ static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) struct dm_io_request io_req = { .bi_rw = rw, .mem.type = DM_IO_VMA, - .mem.ptr.vma = ps->area, + .mem.ptr.vma = area, .client = ps->io_client, .notify.fn = NULL, }; @@ -240,7 +263,7 @@ static int area_io(struct pstore *ps, int rw) chunk = area_location(ps, ps->current_area); - r = chunk_io(ps, chunk, rw, 0); + r = chunk_io(ps, ps->area, chunk, rw, 0); if (r) return r; @@ -254,20 +277,7 @@ static void zero_memory_area(struct pstore *ps) static int zero_disk_area(struct pstore *ps, chunk_t area) { - struct dm_io_region where = { - .bdev = ps->store->cow->bdev, - .sector = ps->store->chunk_size * area_location(ps, area), - .count = ps->store->chunk_size, - }; - struct dm_io_request io_req = { - .bi_rw = WRITE, - .mem.type = DM_IO_VMA, - .mem.ptr.vma = ps->zero_area, - .client = ps->io_client, - .notify.fn = NULL, - }; - - return dm_io(&io_req, 1, &where, NULL); + return chunk_io(ps, ps->zero_area, area_location(ps, area), WRITE, 0); } static int read_header(struct pstore *ps, int *new_snapshot) @@ -276,6 +286,7 @@ static int read_header(struct pstore *ps, int *new_snapshot) struct disk_header *dh; chunk_t chunk_size; int chunk_size_supplied = 1; + char *chunk_err; /* * Use default chunk size (or hardsect_size, if larger) if none supplied @@ -297,11 +308,11 @@ static int read_header(struct pstore *ps, int *new_snapshot) if (r) return r; - r = chunk_io(ps, 0, READ, 1); + r = chunk_io(ps, ps->header_area, 0, READ, 1); if (r) goto bad; - dh = (struct disk_header *) ps->area; + dh = ps->header_area; if (le32_to_cpu(dh->magic) == 0) { *new_snapshot = 1; @@ -319,20 +330,25 @@ static int read_header(struct pstore *ps, int *new_snapshot) ps->version = le32_to_cpu(dh->version); chunk_size = le32_to_cpu(dh->chunk_size); - if (!chunk_size_supplied || ps->store->chunk_size == chunk_size) + if (ps->store->chunk_size == chunk_size) return 0; - DMWARN("chunk size %llu in device metadata overrides " - "table chunk size of %llu.", - (unsigned long long)chunk_size, - (unsigned long long)ps->store->chunk_size); + if (chunk_size_supplied) + DMWARN("chunk size %llu in device metadata overrides " + "table chunk size of %llu.", + (unsigned long long)chunk_size, + (unsigned long long)ps->store->chunk_size); /* We had a bogus chunk_size. Fix stuff up. */ free_area(ps); - ps->store->chunk_size = chunk_size; - ps->store->chunk_mask = chunk_size - 1; - ps->store->chunk_shift = ffs(chunk_size) - 1; + r = dm_exception_store_set_chunk_size(ps->store, chunk_size, + &chunk_err); + if (r) { + DMERR("invalid on-disk chunk size %llu: %s.", + (unsigned long long)chunk_size, chunk_err); + return r; + } r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size), ps->io_client); @@ -351,15 +367,15 @@ static int write_header(struct pstore *ps) { struct disk_header *dh; - memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT); + memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT); - dh = (struct disk_header *) ps->area; + dh = ps->header_area; dh->magic = cpu_to_le32(SNAP_MAGIC); dh->valid = cpu_to_le32(ps->valid); dh->version = cpu_to_le32(ps->version); dh->chunk_size = cpu_to_le32(ps->store->chunk_size); - return chunk_io(ps, 0, WRITE, 1); + return chunk_io(ps, ps->header_area, 0, WRITE, 1); } /* @@ -679,6 +695,8 @@ static int persistent_ctr(struct dm_exception_store *store, ps->valid = 1; ps->version = SNAPSHOT_DISK_VERSION; ps->area = NULL; + ps->zero_area = NULL; + ps->header_area = NULL; ps->next_free = 2; /* skipping the header and first area */ ps->current_committed = 0; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index d573165cd2b7..57f1bf7f3b7a 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1176,6 +1176,15 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, return 0; } +static int snapshot_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + struct dm_snapshot *snap = ti->private; + + return fn(ti, snap->origin, 0, ti->len, data); +} + + /*----------------------------------------------------------------- * Origin methods *---------------------------------------------------------------*/ @@ -1410,20 +1419,29 @@ static int origin_status(struct dm_target *ti, status_type_t type, char *result, return 0; } +static int origin_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + struct dm_dev *dev = ti->private; + + return fn(ti, dev, 0, ti->len, data); +} + static struct target_type origin_target = { .name = "snapshot-origin", - .version = {1, 6, 0}, + .version = {1, 7, 0}, .module = THIS_MODULE, .ctr = origin_ctr, .dtr = origin_dtr, .map = origin_map, .resume = origin_resume, .status = origin_status, + .iterate_devices = origin_iterate_devices, }; static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 6, 0}, + .version = {1, 7, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -1431,6 +1449,7 @@ static struct target_type snapshot_target = { .end_io = snapshot_end_io, .resume = snapshot_resume, .status = snapshot_status, + .iterate_devices = snapshot_iterate_devices, }; static int __init dm_snapshot_init(void) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 4e0e5937e42a..3e563d251733 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -329,9 +329,19 @@ static int stripe_iterate_devices(struct dm_target *ti, return ret; } +static void stripe_io_hints(struct dm_target *ti, + struct queue_limits *limits) +{ + struct stripe_c *sc = ti->private; + unsigned chunk_size = (sc->chunk_mask + 1) << 9; + + blk_limits_io_min(limits, chunk_size); + limits->io_opt = chunk_size * sc->stripes; +} + static struct target_type stripe_target = { .name = "striped", - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = stripe_ctr, .dtr = stripe_dtr, @@ -339,6 +349,7 @@ static struct target_type stripe_target = { .end_io = stripe_end_io, .status = stripe_status, .iterate_devices = stripe_iterate_devices, + .io_hints = stripe_io_hints, }; int __init dm_stripe_init(void) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index d952b3441913..1a6cb3c7822e 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -343,10 +343,10 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) } /* - * If possible, this checks an area of a destination device is valid. + * If possible, this checks an area of a destination device is invalid. */ -static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) +static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) { struct queue_limits *limits = data; struct block_device *bdev = dev->bdev; @@ -357,36 +357,40 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, char b[BDEVNAME_SIZE]; if (!dev_size) - return 1; + return 0; if ((start >= dev_size) || (start + len > dev_size)) { - DMWARN("%s: %s too small for target", - dm_device_name(ti->table->md), bdevname(bdev, b)); - return 0; + DMWARN("%s: %s too small for target: " + "start=%llu, len=%llu, dev_size=%llu", + dm_device_name(ti->table->md), bdevname(bdev, b), + (unsigned long long)start, + (unsigned long long)len, + (unsigned long long)dev_size); + return 1; } if (logical_block_size_sectors <= 1) - return 1; + return 0; if (start & (logical_block_size_sectors - 1)) { DMWARN("%s: start=%llu not aligned to h/w " - "logical block size %hu of %s", + "logical block size %u of %s", dm_device_name(ti->table->md), (unsigned long long)start, limits->logical_block_size, bdevname(bdev, b)); - return 0; + return 1; } if (len & (logical_block_size_sectors - 1)) { DMWARN("%s: len=%llu not aligned to h/w " - "logical block size %hu of %s", + "logical block size %u of %s", dm_device_name(ti->table->md), (unsigned long long)len, limits->logical_block_size, bdevname(bdev, b)); - return 0; + return 1; } - return 1; + return 0; } /* @@ -496,8 +500,15 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, } if (blk_stack_limits(limits, &q->limits, start << 9) < 0) - DMWARN("%s: target device %s is misaligned", - dm_device_name(ti->table->md), bdevname(bdev, b)); + DMWARN("%s: target device %s is misaligned: " + "physical_block_size=%u, logical_block_size=%u, " + "alignment_offset=%u, start=%llu", + dm_device_name(ti->table->md), bdevname(bdev, b), + q->limits.physical_block_size, + q->limits.logical_block_size, + q->limits.alignment_offset, + (unsigned long long) start << 9); + /* * Check if merge fn is supported. @@ -698,7 +709,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table, if (remaining) { DMWARN("%s: table line %u (start sect %llu len %llu) " - "not aligned to h/w logical block size %hu", + "not aligned to h/w logical block size %u", dm_device_name(table->md), i, (unsigned long long) ti->begin, (unsigned long long) ti->len, @@ -996,12 +1007,16 @@ int dm_calculate_queue_limits(struct dm_table *table, ti->type->iterate_devices(ti, dm_set_device_limits, &ti_limits); + /* Set I/O hints portion of queue limits */ + if (ti->type->io_hints) + ti->type->io_hints(ti, &ti_limits); + /* * Check each device area is consistent with the target's * overall queue limits. */ - if (!ti->type->iterate_devices(ti, device_area_is_valid, - &ti_limits)) + if (ti->type->iterate_devices(ti, device_area_is_invalid, + &ti_limits)) return -EINVAL; combine_limits: diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8a311ea0d441..b4845b14740d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -738,16 +738,22 @@ static void rq_completed(struct mapped_device *md, int run_queue) dm_put(md); } +static void free_rq_clone(struct request *clone) +{ + struct dm_rq_target_io *tio = clone->end_io_data; + + blk_rq_unprep_clone(clone); + free_rq_tio(tio); +} + static void dm_unprep_request(struct request *rq) { struct request *clone = rq->special; - struct dm_rq_target_io *tio = clone->end_io_data; rq->special = NULL; rq->cmd_flags &= ~REQ_DONTPREP; - blk_rq_unprep_clone(clone); - free_rq_tio(tio); + free_rq_clone(clone); } /* @@ -825,8 +831,7 @@ static void dm_end_request(struct request *clone, int error) rq->sense_len = clone->sense_len; } - BUG_ON(clone->bio); - free_rq_tio(tio); + free_rq_clone(clone); blk_end_request_all(rq, error); diff --git a/drivers/md/md.c b/drivers/md/md.c index 5b98bea4ff9b..9dd872000cec 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit) else new->md_minor = MINOR(unit) >> MdpMinorShift; + mutex_init(&new->open_mutex); mutex_init(&new->reconfig_mutex); INIT_LIST_HEAD(&new->disks); INIT_LIST_HEAD(&new->all_mddevs); @@ -1974,17 +1975,14 @@ repeat: /* otherwise we have to go forward and ... */ mddev->events ++; if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ - /* .. if the array isn't clean, insist on an odd 'events' */ - if ((mddev->events&1)==0) { - mddev->events++; + /* .. if the array isn't clean, an 'even' event must also go + * to spares. */ + if ((mddev->events&1)==0) nospares = 0; - } } else { - /* otherwise insist on an even 'events' (for clean states) */ - if ((mddev->events&1)) { - mddev->events++; + /* otherwise an 'odd' event must go to spares */ + if ((mddev->events&1)) nospares = 0; - } } } @@ -3601,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len) if (max < mddev->resync_min) return -EINVAL; if (max < mddev->resync_max && + mddev->ro == 0 && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) return -EBUSY; @@ -4304,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) struct gendisk *disk = mddev->gendisk; mdk_rdev_t *rdev; + mutex_lock(&mddev->open_mutex); if (atomic_read(&mddev->openers) > is_open) { printk("md: %s still in use.\n",mdname(mddev)); - return -EBUSY; - } - - if (mddev->pers) { + err = -EBUSY; + } else if (mddev->pers) { if (mddev->sync_thread) { set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); @@ -4366,8 +4364,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) if (mode == 1) set_disk_ro(disk, 1); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + err = 0; } - +out: + mutex_unlock(&mddev->open_mutex); + if (err) + return err; /* * Free resources if final stop */ @@ -4433,7 +4435,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) blk_integrity_unregister(disk); md_new_event(mddev); sysfs_notify_dirent(mddev->sysfs_state); -out: return err; } @@ -5518,12 +5519,12 @@ static int md_open(struct block_device *bdev, fmode_t mode) } BUG_ON(mddev != bdev->bd_disk->private_data); - if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) + if ((err = mutex_lock_interruptible(&mddev->open_mutex))) goto out; err = 0; atomic_inc(&mddev->openers); - mddev_unlock(mddev); + mutex_unlock(&mddev->open_mutex); check_disk_change(bdev); out: diff --git a/drivers/md/md.h b/drivers/md/md.h index 78f03168baf9..f8fc188bc762 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -223,6 +223,16 @@ struct mddev_s * so we don't loop trying */ int in_sync; /* know to not need resync */ + /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so + * that we are never stopping an array while it is open. + * 'reconfig_mutex' protects all other reconfiguration. + * These locks are separate due to conflicting interactions + * with bdev->bd_mutex. + * Lock ordering is: + * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk + * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open + */ + struct mutex open_mutex; struct mutex reconfig_mutex; atomic_t active; /* general refcount */ atomic_t openers; /* number of active opens */ diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2b521ee67dfa..b8a2c5dc67ba 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped conf->reshape_progress < raid5_size(mddev, 0, 0)) { sector_nr = raid5_size(mddev, 0, 0) - conf->reshape_progress; - } else if (mddev->delta_disks > 0 && + } else if (mddev->delta_disks >= 0 && conf->reshape_progress > 0) sector_nr = conf->reshape_progress; sector_div(sector_nr, new_data_disks); @@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev) (old_disks-max_degraded)); /* here_old is the first stripe that we might need to read * from */ - if (here_new >= here_old) { + if (mddev->delta_disks == 0) { + /* We cannot be sure it is safe to start an in-place + * reshape. It is only safe if user-space if monitoring + * and taking constant backups. + * mdadm always starts a situation like this in + * readonly mode so it can take control before + * allowing any writes. So just check for that. + */ + if ((here_new * mddev->new_chunk_sectors != + here_old * mddev->chunk_sectors) || + mddev->ro == 0) { + printk(KERN_ERR "raid5: in-place reshape must be started" + " in read-only mode - aborting\n"); + return -EINVAL; + } + } else if (mddev->delta_disks < 0 + ? (here_new * mddev->new_chunk_sectors <= + here_old * mddev->chunk_sectors) + : (here_new * mddev->new_chunk_sectors >= + here_old * mddev->chunk_sectors)) { /* Reading from the same stripe as writing to - bad */ printk(KERN_ERR "raid5: reshape_position too early for " "auto-recovery - aborting.\n"); @@ -5078,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev) mddev->degraded--; for (d = conf->raid_disks ; d < conf->raid_disks - mddev->delta_disks; - d++) - raid5_remove_disk(mddev, d); + d++) { + mdk_rdev_t *rdev = conf->disks[d].rdev; + if (rdev && raid5_remove_disk(mddev, d) == 0) { + char nm[20]; + sprintf(nm, "rd%d", rdev->raid_disk); + sysfs_remove_link(&mddev->kobj, nm); + rdev->raid_disk = -1; + } + } } mddev->layout = conf->algorithm; mddev->chunk_sectors = conf->chunk_sectors; |