diff options
author | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2014-06-07 00:45:22 -0500 |
---|---|---|
committer | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2015-02-23 09:59:05 -0600 |
commit | e94987db2ed983aea4e45d22db9e17c6bbf2a623 (patch) | |
tree | d216701edf17d2f1b6781be78da780a8c9cf6c18 | |
parent | 11dd35daaab86d12270d23a10e8d242846a8830a (diff) | |
download | blackbird-op-linux-e94987db2ed983aea4e45d22db9e17c6bbf2a623.tar.gz blackbird-op-linux-e94987db2ed983aea4e45d22db9e17c6bbf2a623.zip |
Initiate recovery on node failure
The DLM informs us in case of node failure with the DLM slot number.
cluster_info->recovery_map sets the bit corresponding to the slot number
and wakes up the recovery thread.
The recovery thread:
1. Derives the slot number from the recovery_map
2. Locks the bitmap corresponding to the slot
3. Copies the set bits to the node-local bitmap
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
-rw-r--r-- | drivers/md/md-cluster.c | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index b59c3a0ebd08..1f82d0d731ae 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -13,6 +13,7 @@ #include <linux/dlm.h> #include <linux/sched.h> #include "md.h" +#include "bitmap.h" #include "md-cluster.h" #define LVB_SIZE 64 @@ -49,6 +50,8 @@ struct md_cluster_info { struct dlm_lock_resource *bitmap_lockres; struct list_head suspend_list; spinlock_t suspend_lock; + struct md_thread *recovery_thread; + unsigned long recovery_map; }; static void sync_ast(void *arg) @@ -184,6 +187,50 @@ out: return s; } +void recover_bitmaps(struct md_thread *thread) +{ + struct mddev *mddev = thread->mddev; + struct md_cluster_info *cinfo = mddev->cluster_info; + struct dlm_lock_resource *bm_lockres; + char str[64]; + int slot, ret; + struct suspend_info *s, *tmp; + sector_t lo, hi; + + while (cinfo->recovery_map) { + slot = fls64((u64)cinfo->recovery_map) - 1; + + /* Clear suspend_area associated with the bitmap */ + spin_lock_irq(&cinfo->suspend_lock); + list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list) + if (slot == s->slot) { + list_del(&s->list); + kfree(s); + } + spin_unlock_irq(&cinfo->suspend_lock); + + snprintf(str, 64, "bitmap%04d", slot); + bm_lockres = lockres_init(mddev, str, NULL, 1); + if (!bm_lockres) { + pr_err("md-cluster: Cannot initialize bitmaps\n"); + goto clear_bit; + } + + ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); + if (ret) { + pr_err("md-cluster: Could not DLM lock %s: %d\n", + str, ret); + goto clear_bit; + } + ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi); + if (ret) + pr_err("md-cluster: Could not copy data from bitmap %d\n", slot); + dlm_unlock_sync(bm_lockres); +clear_bit: + clear_bit(slot, &cinfo->recovery_map); + } +} + static void recover_prep(void *arg) { } @@ -197,6 +244,16 @@ static void recover_slot(void *arg, struct dlm_slot *slot) mddev->bitmap_info.cluster_name, slot->nodeid, slot->slot, cinfo->slot_number); + set_bit(slot->slot - 1, &cinfo->recovery_map); + if (!cinfo->recovery_thread) { + cinfo->recovery_thread = md_register_thread(recover_bitmaps, + mddev, "recover"); + if (!cinfo->recovery_thread) { + pr_warn("md-cluster: Could not create recovery thread\n"); + return; + } + } + md_wakeup_thread(cinfo->recovery_thread); } static void recover_done(void *arg, struct dlm_slot *slots, @@ -338,6 +395,7 @@ static int leave(struct mddev *mddev) if (!cinfo) return 0; + md_unregister_thread(&cinfo->recovery_thread); lockres_free(cinfo->sb_lock); lockres_free(cinfo->bitmap_lockres); dlm_release_lockspace(cinfo->lockspace, 2); |