summaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_actlog.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd/drbd_actlog.c')
-rw-r--r--drivers/block/drbd/drbd_actlog.c315
1 files changed, 141 insertions, 174 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 9c42edf4871b..278c31f24639 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -667,36 +667,56 @@ int drbd_initialize_al(struct drbd_device *device, void *buffer)
return 0;
}
+static const char *drbd_change_sync_fname[] = {
+ [RECORD_RS_FAILED] = "drbd_rs_failed_io",
+ [SET_IN_SYNC] = "drbd_set_in_sync",
+ [SET_OUT_OF_SYNC] = "drbd_set_out_of_sync"
+};
+
/* ATTENTION. The AL's extents are 4MB each, while the extents in the
* resync LRU-cache are 16MB each.
* The caller of this function has to hold an get_ldev() reference.
*
+ * Adjusts the caching members ->rs_left (success) or ->rs_failed (!success),
+ * potentially pulling in (and recounting the corresponding bits)
+ * this resync extent into the resync extent lru cache.
+ *
+ * Returns whether all bits have been cleared for this resync extent,
+ * precisely: (rs_left <= rs_failed)
+ *
* TODO will be obsoleted once we have a caching lru of the on disk bitmap
*/
-static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t sector,
- int count, int success)
+static bool update_rs_extent(struct drbd_device *device,
+ unsigned int enr, int count,
+ enum update_sync_bits_mode mode)
{
struct lc_element *e;
- unsigned int enr;
D_ASSERT(device, atomic_read(&device->local_cnt));
- /* I simply assume that a sector/size pair never crosses
- * a 16 MB extent border. (Currently this is true...) */
- enr = BM_SECT_TO_EXT(sector);
-
- e = lc_get(device->resync, enr);
+ /* When setting out-of-sync bits,
+ * we don't need it cached (lc_find).
+ * But if it is present in the cache,
+ * we should update the cached bit count.
+ * Otherwise, that extent should be in the resync extent lru cache
+ * already -- or we want to pull it in if necessary -- (lc_get),
+ * then update and check rs_left and rs_failed. */
+ if (mode == SET_OUT_OF_SYNC)
+ e = lc_find(device->resync, enr);
+ else
+ e = lc_get(device->resync, enr);
if (e) {
struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
if (ext->lce.lc_number == enr) {
- if (success)
+ if (mode == SET_IN_SYNC)
ext->rs_left -= count;
+ else if (mode == SET_OUT_OF_SYNC)
+ ext->rs_left += count;
else
ext->rs_failed += count;
if (ext->rs_left < ext->rs_failed) {
- drbd_warn(device, "BAD! sector=%llus enr=%u rs_left=%d "
+ drbd_warn(device, "BAD! enr=%u rs_left=%d "
"rs_failed=%d count=%d cstate=%s\n",
- (unsigned long long)sector,
ext->lce.lc_number, ext->rs_left,
ext->rs_failed, count,
drbd_conn_str(device->state.conn));
@@ -730,24 +750,27 @@ static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t secto
ext->lce.lc_number, ext->rs_failed);
}
ext->rs_left = rs_left;
- ext->rs_failed = success ? 0 : count;
+ ext->rs_failed = (mode == RECORD_RS_FAILED) ? count : 0;
/* we don't keep a persistent log of the resync lru,
* we can commit any change right away. */
lc_committed(device->resync);
}
- lc_put(device->resync, &ext->lce);
+ if (mode != SET_OUT_OF_SYNC)
+ lc_put(device->resync, &ext->lce);
/* no race, we are within the al_lock! */
- if (ext->rs_left == ext->rs_failed) {
+ if (ext->rs_left <= ext->rs_failed) {
ext->rs_failed = 0;
- wake_up(&first_peer_device(device)->connection->sender_work.q_wait);
+ return true;
}
- } else {
+ } else if (mode != SET_OUT_OF_SYNC) {
+ /* be quiet if lc_find() did not find it. */
drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n",
device->resync_locked,
device->resync->nr_elements,
device->resync->flags);
}
+ return false;
}
void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go)
@@ -766,105 +789,112 @@ void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go
}
}
-/* clear the bit corresponding to the piece of storage in question:
- * size byte of data starting from sector. Only clear a bits of the affected
- * one ore more _aligned_ BM_BLOCK_SIZE blocks.
- *
- * called by worker on C_SYNC_TARGET and receiver on SyncSource.
- *
- */
-void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size,
- const char *file, const unsigned int line)
+/* It is called lazy update, so don't do write-out too often. */
+static bool lazy_bitmap_update_due(struct drbd_device *device)
{
- /* Is called from worker and receiver context _only_ */
- unsigned long sbnr, ebnr, lbnr;
- unsigned long count = 0;
- sector_t esector, nr_sectors;
- int wake_up = 0;
- unsigned long flags;
+ return time_after(jiffies, device->rs_last_bcast + 2*HZ);
+}
- if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
- drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
- (unsigned long long)sector, size);
+static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done)
+{
+ struct drbd_connection *connection;
+ if (rs_done)
+ set_bit(RS_DONE, &device->flags);
+ /* and also set RS_PROGRESS below */
+ else if (!lazy_bitmap_update_due(device))
return;
- }
- if (!get_ldev(device))
- return; /* no disk, no metadata, no bitmap to clear bits in */
-
- nr_sectors = drbd_get_capacity(device->this_bdev);
- esector = sector + (size >> 9) - 1;
-
- if (!expect(sector < nr_sectors))
- goto out;
- if (!expect(esector < nr_sectors))
- esector = nr_sectors - 1;
-
- lbnr = BM_SECT_TO_BIT(nr_sectors-1);
-
- /* we clear it (in sync).
- * round up start sector, round down end sector. we make sure we only
- * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */
- if (unlikely(esector < BM_SECT_PER_BIT-1))
- goto out;
- if (unlikely(esector == (nr_sectors-1)))
- ebnr = lbnr;
- else
- ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
- sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
-
- if (sbnr > ebnr)
- goto out;
+ /* compare with test_and_clear_bit() calls in and above
+ * try_update_all_on_disk_bitmaps() from the drbd_worker(). */
+ if (test_and_set_bit(RS_PROGRESS, &device->flags))
+ return;
+ connection = first_peer_device(device)->connection;
+ if (!test_and_set_bit(CONN_RS_PROGRESS, &connection->flags))
+ wake_up(&connection->sender_work.q_wait);
+}
+static int update_sync_bits(struct drbd_device *device,
+ unsigned long sbnr, unsigned long ebnr,
+ enum update_sync_bits_mode mode)
+{
/*
- * ok, (capacity & 7) != 0 sometimes, but who cares...
- * we count rs_{total,left} in bits, not sectors.
+ * We keep a count of set bits per resync-extent in the ->rs_left
+ * caching member, so we need to loop and work within the resync extent
+ * alignment. Typically this loop will execute exactly once.
*/
- count = drbd_bm_clear_bits(device, sbnr, ebnr);
- if (count) {
- drbd_advance_rs_marks(device, drbd_bm_total_weight(device));
- spin_lock_irqsave(&device->al_lock, flags);
- drbd_try_clear_on_disk_bm(device, sector, count, true);
- spin_unlock_irqrestore(&device->al_lock, flags);
-
- /* just wake_up unconditional now, various lc_chaged(),
- * lc_put() in drbd_try_clear_on_disk_bm(). */
- wake_up = 1;
+ unsigned long flags;
+ unsigned long count = 0;
+ unsigned int cleared = 0;
+ while (sbnr <= ebnr) {
+ /* set temporary boundary bit number to last bit number within
+ * the resync extent of the current start bit number,
+ * but cap at provided end bit number */
+ unsigned long tbnr = min(ebnr, sbnr | BM_BLOCKS_PER_BM_EXT_MASK);
+ unsigned long c;
+
+ if (mode == RECORD_RS_FAILED)
+ /* Only called from drbd_rs_failed_io(), bits
+ * supposedly still set. Recount, maybe some
+ * of the bits have been successfully cleared
+ * by application IO meanwhile.
+ */
+ c = drbd_bm_count_bits(device, sbnr, tbnr);
+ else if (mode == SET_IN_SYNC)
+ c = drbd_bm_clear_bits(device, sbnr, tbnr);
+ else /* if (mode == SET_OUT_OF_SYNC) */
+ c = drbd_bm_set_bits(device, sbnr, tbnr);
+
+ if (c) {
+ spin_lock_irqsave(&device->al_lock, flags);
+ cleared += update_rs_extent(device, BM_BIT_TO_EXT(sbnr), c, mode);
+ spin_unlock_irqrestore(&device->al_lock, flags);
+ count += c;
+ }
+ sbnr = tbnr + 1;
}
-out:
- put_ldev(device);
- if (wake_up)
+ if (count) {
+ if (mode == SET_IN_SYNC) {
+ unsigned long still_to_go = drbd_bm_total_weight(device);
+ bool rs_is_done = (still_to_go <= device->rs_failed);
+ drbd_advance_rs_marks(device, still_to_go);
+ if (cleared || rs_is_done)
+ maybe_schedule_on_disk_bitmap_update(device, rs_is_done);
+ } else if (mode == RECORD_RS_FAILED)
+ device->rs_failed += count;
wake_up(&device->al_wait);
+ }
+ return count;
}
-/*
- * this is intended to set one request worth of data out of sync.
- * affects at least 1 bit,
- * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits.
+/* clear the bit corresponding to the piece of storage in question:
+ * size byte of data starting from sector. Only clear a bits of the affected
+ * one ore more _aligned_ BM_BLOCK_SIZE blocks.
+ *
+ * called by worker on C_SYNC_TARGET and receiver on SyncSource.
*
- * called by tl_clear and drbd_send_dblock (==drbd_make_request).
- * so this can be _any_ process.
*/
-int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size,
- const char *file, const unsigned int line)
+int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
+ enum update_sync_bits_mode mode,
+ const char *file, const unsigned int line)
{
- unsigned long sbnr, ebnr, flags;
+ /* Is called from worker and receiver context _only_ */
+ unsigned long sbnr, ebnr, lbnr;
+ unsigned long count = 0;
sector_t esector, nr_sectors;
- unsigned int enr, count = 0;
- struct lc_element *e;
- /* this should be an empty REQ_FLUSH */
- if (size == 0)
+ /* This would be an empty REQ_FLUSH, be silent. */
+ if ((mode == SET_OUT_OF_SYNC) && size == 0)
return 0;
- if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
- drbd_err(device, "sector: %llus, size: %d\n",
- (unsigned long long)sector, size);
+ if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
+ drbd_err(device, "%s: sector=%llus size=%d nonsense!\n",
+ drbd_change_sync_fname[mode],
+ (unsigned long long)sector, size);
return 0;
}
if (!get_ldev(device))
- return 0; /* no disk, no metadata, no bitmap to set bits in */
+ return 0; /* no disk, no metadata, no bitmap to manipulate bits in */
nr_sectors = drbd_get_capacity(device->this_bdev);
esector = sector + (size >> 9) - 1;
@@ -874,25 +904,28 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size
if (!expect(esector < nr_sectors))
esector = nr_sectors - 1;
- /* we set it out of sync,
- * we do not need to round anything here */
- sbnr = BM_SECT_TO_BIT(sector);
- ebnr = BM_SECT_TO_BIT(esector);
-
- /* ok, (capacity & 7) != 0 sometimes, but who cares...
- * we count rs_{total,left} in bits, not sectors. */
- spin_lock_irqsave(&device->al_lock, flags);
- count = drbd_bm_set_bits(device, sbnr, ebnr);
+ lbnr = BM_SECT_TO_BIT(nr_sectors-1);
- enr = BM_SECT_TO_EXT(sector);
- e = lc_find(device->resync, enr);
- if (e)
- lc_entry(e, struct bm_extent, lce)->rs_left += count;
- spin_unlock_irqrestore(&device->al_lock, flags);
+ if (mode == SET_IN_SYNC) {
+ /* Round up start sector, round down end sector. We make sure
+ * we only clear full, aligned, BM_BLOCK_SIZE blocks. */
+ if (unlikely(esector < BM_SECT_PER_BIT-1))
+ goto out;
+ if (unlikely(esector == (nr_sectors-1)))
+ ebnr = lbnr;
+ else
+ ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
+ sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
+ } else {
+ /* We set it out of sync, or record resync failure.
+ * Should not round anything here. */
+ sbnr = BM_SECT_TO_BIT(sector);
+ ebnr = BM_SECT_TO_BIT(esector);
+ }
+ count = update_sync_bits(device, sbnr, ebnr, mode);
out:
put_ldev(device);
-
return count;
}
@@ -1209,69 +1242,3 @@ int drbd_rs_del_all(struct drbd_device *device)
return 0;
}
-
-/**
- * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks
- * @device: DRBD device.
- * @sector: The sector number.
- * @size: Size of failed IO operation, in byte.
- */
-void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size)
-{
- /* Is called from worker and receiver context _only_ */
- unsigned long sbnr, ebnr, lbnr;
- unsigned long count;
- sector_t esector, nr_sectors;
- int wake_up = 0;
-
- if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
- drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
- (unsigned long long)sector, size);
- return;
- }
- nr_sectors = drbd_get_capacity(device->this_bdev);
- esector = sector + (size >> 9) - 1;
-
- if (!expect(sector < nr_sectors))
- return;
- if (!expect(esector < nr_sectors))
- esector = nr_sectors - 1;
-
- lbnr = BM_SECT_TO_BIT(nr_sectors-1);
-
- /*
- * round up start sector, round down end sector. we make sure we only
- * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */
- if (unlikely(esector < BM_SECT_PER_BIT-1))
- return;
- if (unlikely(esector == (nr_sectors-1)))
- ebnr = lbnr;
- else
- ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
- sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
-
- if (sbnr > ebnr)
- return;
-
- /*
- * ok, (capacity & 7) != 0 sometimes, but who cares...
- * we count rs_{total,left} in bits, not sectors.
- */
- spin_lock_irq(&device->al_lock);
- count = drbd_bm_count_bits(device, sbnr, ebnr);
- if (count) {
- device->rs_failed += count;
-
- if (get_ldev(device)) {
- drbd_try_clear_on_disk_bm(device, sector, count, false);
- put_ldev(device);
- }
-
- /* just wake_up unconditional now, various lc_chaged(),
- * lc_put() in drbd_try_clear_on_disk_bm(). */
- wake_up = 1;
- }
- spin_unlock_irq(&device->al_lock);
- if (wake_up)
- wake_up(&device->al_wait);
-}
OpenPOWER on IntegriCloud