blob: 03e521a9ca7d67bc48052f5edd45e1a9a4222a11 [file] [log] [blame]
Goldwyn Rodrigues8e854e92014-03-07 11:21:15 -06001/*
2 * Copyright (C) 2015, SUSE
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 */
10
11
12#include <linux/module.h>
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -060013#include <linux/dlm.h>
14#include <linux/sched.h>
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -050015#include <linux/raid/md_p.h>
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -060016#include "md.h"
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -050017#include "bitmap.h"
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -050018#include "md-cluster.h"
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -060019
20#define LVB_SIZE 64
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -050021#define NEW_DEV_TIMEOUT 5000
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -060022
23struct dlm_lock_resource {
24 dlm_lockspace_t *ls;
25 struct dlm_lksb lksb;
26 char *name; /* lock name. */
27 uint32_t flags; /* flags to pass to dlm_lock() */
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -060028 struct completion completion; /* completion for synchronized locking */
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -050029 void (*bast)(void *arg, int mode); /* blocking AST function pointer*/
30 struct mddev *mddev; /* pointing back to mddev. */
31};
32
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -050033struct suspend_info {
34 int slot;
35 sector_t lo;
36 sector_t hi;
37 struct list_head list;
38};
39
40struct resync_info {
41 __le64 lo;
42 __le64 hi;
43};
44
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -050045struct md_cluster_info {
46 /* dlm lock space and resources for clustered raid. */
47 dlm_lockspace_t *lockspace;
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -050048 int slot_number;
49 struct completion completion;
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -050050 struct dlm_lock_resource *sb_lock;
51 struct mutex sb_mutex;
Goldwyn Rodrigues54519c52014-06-06 12:12:32 -050052 struct dlm_lock_resource *bitmap_lockres;
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -050053 struct list_head suspend_list;
54 spinlock_t suspend_lock;
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -050055 struct md_thread *recovery_thread;
56 unsigned long recovery_map;
Goldwyn Rodrigues46646802014-06-07 01:08:29 -050057 /* communication loc resources */
58 struct dlm_lock_resource *ack_lockres;
59 struct dlm_lock_resource *message_lockres;
60 struct dlm_lock_resource *token_lockres;
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -050061 struct dlm_lock_resource *no_new_dev_lockres;
Goldwyn Rodrigues46646802014-06-07 01:08:29 -050062 struct md_thread *recv_thread;
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -050063 struct completion newdisk_completion;
Goldwyn Rodrigues46646802014-06-07 01:08:29 -050064};
65
66enum msg_type {
67 METADATA_UPDATED = 0,
68 RESYNCING,
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -050069 NEWDISK,
Goldwyn Rodrigues46646802014-06-07 01:08:29 -050070};
71
72struct cluster_msg {
73 int type;
74 int slot;
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -050075 /* TODO: Unionize this for smaller footprint */
Goldwyn Rodrigues46646802014-06-07 01:08:29 -050076 sector_t low;
77 sector_t high;
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -050078 char uuid[16];
79 int raid_slot;
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -060080};
81
82static void sync_ast(void *arg)
83{
84 struct dlm_lock_resource *res;
85
86 res = (struct dlm_lock_resource *) arg;
87 complete(&res->completion);
88}
89
90static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
91{
92 int ret = 0;
93
94 init_completion(&res->completion);
95 ret = dlm_lock(res->ls, mode, &res->lksb,
96 res->flags, res->name, strlen(res->name),
97 0, sync_ast, res, res->bast);
98 if (ret)
99 return ret;
100 wait_for_completion(&res->completion);
101 return res->lksb.sb_status;
102}
103
104static int dlm_unlock_sync(struct dlm_lock_resource *res)
105{
106 return dlm_lock_sync(res, DLM_LOCK_NL);
107}
108
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500109static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -0600110 char *name, void (*bastfn)(void *arg, int mode), int with_lvb)
111{
112 struct dlm_lock_resource *res = NULL;
113 int ret, namelen;
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500114 struct md_cluster_info *cinfo = mddev->cluster_info;
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -0600115
116 res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL);
117 if (!res)
118 return NULL;
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500119 res->ls = cinfo->lockspace;
120 res->mddev = mddev;
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -0600121 namelen = strlen(name);
122 res->name = kzalloc(namelen + 1, GFP_KERNEL);
123 if (!res->name) {
124 pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
125 goto out_err;
126 }
127 strlcpy(res->name, name, namelen + 1);
128 if (with_lvb) {
129 res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
130 if (!res->lksb.sb_lvbptr) {
131 pr_err("md-cluster: Unable to allocate LVB for resource %s\n", name);
132 goto out_err;
133 }
134 res->flags = DLM_LKF_VALBLK;
135 }
136
137 if (bastfn)
138 res->bast = bastfn;
139
140 res->flags |= DLM_LKF_EXPEDITE;
141
142 ret = dlm_lock_sync(res, DLM_LOCK_NL);
143 if (ret) {
144 pr_err("md-cluster: Unable to lock NL on new lock resource %s\n", name);
145 goto out_err;
146 }
147 res->flags &= ~DLM_LKF_EXPEDITE;
148 res->flags |= DLM_LKF_CONVERT;
149
150 return res;
151out_err:
152 kfree(res->lksb.sb_lvbptr);
153 kfree(res->name);
154 kfree(res);
155 return NULL;
156}
157
158static void lockres_free(struct dlm_lock_resource *res)
159{
160 if (!res)
161 return;
162
163 init_completion(&res->completion);
164 dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
165 wait_for_completion(&res->completion);
166
167 kfree(res->name);
168 kfree(res->lksb.sb_lvbptr);
169 kfree(res);
170}
Goldwyn Rodrigues8e854e92014-03-07 11:21:15 -0600171
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500172static char *pretty_uuid(char *dest, char *src)
173{
174 int i, len = 0;
175
176 for (i = 0; i < 16; i++) {
177 if (i == 4 || i == 6 || i == 8 || i == 10)
178 len += sprintf(dest + len, "-");
179 len += sprintf(dest + len, "%02x", (__u8)src[i]);
180 }
181 return dest;
182}
183
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -0500184static void add_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres,
185 sector_t lo, sector_t hi)
186{
187 struct resync_info *ri;
188
189 ri = (struct resync_info *)lockres->lksb.sb_lvbptr;
190 ri->lo = cpu_to_le64(lo);
191 ri->hi = cpu_to_le64(hi);
192}
193
194static struct suspend_info *read_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres)
195{
196 struct resync_info ri;
197 struct suspend_info *s = NULL;
198 sector_t hi = 0;
199
200 dlm_lock_sync(lockres, DLM_LOCK_CR);
201 memcpy(&ri, lockres->lksb.sb_lvbptr, sizeof(struct resync_info));
202 hi = le64_to_cpu(ri.hi);
203 if (ri.hi > 0) {
204 s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
205 if (!s)
206 goto out;
207 s->hi = hi;
208 s->lo = le64_to_cpu(ri.lo);
209 }
210 dlm_unlock_sync(lockres);
211out:
212 return s;
213}
214
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -0500215void recover_bitmaps(struct md_thread *thread)
216{
217 struct mddev *mddev = thread->mddev;
218 struct md_cluster_info *cinfo = mddev->cluster_info;
219 struct dlm_lock_resource *bm_lockres;
220 char str[64];
221 int slot, ret;
222 struct suspend_info *s, *tmp;
223 sector_t lo, hi;
224
225 while (cinfo->recovery_map) {
226 slot = fls64((u64)cinfo->recovery_map) - 1;
227
228 /* Clear suspend_area associated with the bitmap */
229 spin_lock_irq(&cinfo->suspend_lock);
230 list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
231 if (slot == s->slot) {
232 list_del(&s->list);
233 kfree(s);
234 }
235 spin_unlock_irq(&cinfo->suspend_lock);
236
237 snprintf(str, 64, "bitmap%04d", slot);
238 bm_lockres = lockres_init(mddev, str, NULL, 1);
239 if (!bm_lockres) {
240 pr_err("md-cluster: Cannot initialize bitmaps\n");
241 goto clear_bit;
242 }
243
244 ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
245 if (ret) {
246 pr_err("md-cluster: Could not DLM lock %s: %d\n",
247 str, ret);
248 goto clear_bit;
249 }
250 ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi);
Goldwyn Rodrigues4b26a082014-06-07 00:52:29 -0500251 if (ret) {
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -0500252 pr_err("md-cluster: Could not copy data from bitmap %d\n", slot);
Goldwyn Rodrigues4b26a082014-06-07 00:52:29 -0500253 goto dlm_unlock;
254 }
255 if (hi > 0) {
256 /* TODO:Wait for current resync to get over */
257 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
258 if (lo < mddev->recovery_cp)
259 mddev->recovery_cp = lo;
260 md_check_recovery(mddev);
261 }
262dlm_unlock:
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -0500263 dlm_unlock_sync(bm_lockres);
264clear_bit:
265 clear_bit(slot, &cinfo->recovery_map);
266 }
267}
268
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500269static void recover_prep(void *arg)
270{
271}
272
273static void recover_slot(void *arg, struct dlm_slot *slot)
274{
275 struct mddev *mddev = arg;
276 struct md_cluster_info *cinfo = mddev->cluster_info;
277
278 pr_info("md-cluster: %s Node %d/%d down. My slot: %d. Initiating recovery.\n",
279 mddev->bitmap_info.cluster_name,
280 slot->nodeid, slot->slot,
281 cinfo->slot_number);
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -0500282 set_bit(slot->slot - 1, &cinfo->recovery_map);
283 if (!cinfo->recovery_thread) {
284 cinfo->recovery_thread = md_register_thread(recover_bitmaps,
285 mddev, "recover");
286 if (!cinfo->recovery_thread) {
287 pr_warn("md-cluster: Could not create recovery thread\n");
288 return;
289 }
290 }
291 md_wakeup_thread(cinfo->recovery_thread);
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500292}
293
294static void recover_done(void *arg, struct dlm_slot *slots,
295 int num_slots, int our_slot,
296 uint32_t generation)
297{
298 struct mddev *mddev = arg;
299 struct md_cluster_info *cinfo = mddev->cluster_info;
300
301 cinfo->slot_number = our_slot;
302 complete(&cinfo->completion);
303}
304
305static const struct dlm_lockspace_ops md_ls_ops = {
306 .recover_prep = recover_prep,
307 .recover_slot = recover_slot,
308 .recover_done = recover_done,
309};
310
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500311/*
312 * The BAST function for the ack lock resource
313 * This function wakes up the receive thread in
314 * order to receive and process the message.
315 */
316static void ack_bast(void *arg, int mode)
317{
318 struct dlm_lock_resource *res = (struct dlm_lock_resource *)arg;
319 struct md_cluster_info *cinfo = res->mddev->cluster_info;
320
321 if (mode == DLM_LOCK_EX)
322 md_wakeup_thread(cinfo->recv_thread);
323}
324
Goldwyn Rodriguese59721c2014-06-07 02:30:30 -0500325static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
326{
327 struct suspend_info *s, *tmp;
328
329 list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
330 if (slot == s->slot) {
331 pr_info("%s:%d Deleting suspend_info: %d\n",
332 __func__, __LINE__, slot);
333 list_del(&s->list);
334 kfree(s);
335 break;
336 }
337}
338
339static void remove_suspend_info(struct md_cluster_info *cinfo, int slot)
340{
341 spin_lock_irq(&cinfo->suspend_lock);
342 __remove_suspend_info(cinfo, slot);
343 spin_unlock_irq(&cinfo->suspend_lock);
344}
345
346
347static void process_suspend_info(struct md_cluster_info *cinfo,
348 int slot, sector_t lo, sector_t hi)
349{
350 struct suspend_info *s;
351
352 if (!hi) {
353 remove_suspend_info(cinfo, slot);
354 return;
355 }
356 s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
357 if (!s)
358 return;
359 s->slot = slot;
360 s->lo = lo;
361 s->hi = hi;
362 spin_lock_irq(&cinfo->suspend_lock);
363 /* Remove existing entry (if exists) before adding */
364 __remove_suspend_info(cinfo, slot);
365 list_add(&s->list, &cinfo->suspend_list);
366 spin_unlock_irq(&cinfo->suspend_lock);
367}
368
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500369static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
370{
371 char disk_uuid[64];
372 struct md_cluster_info *cinfo = mddev->cluster_info;
373 char event_name[] = "EVENT=ADD_DEVICE";
374 char raid_slot[16];
375 char *envp[] = {event_name, disk_uuid, raid_slot, NULL};
376 int len;
377
378 len = snprintf(disk_uuid, 64, "DEVICE_UUID=");
379 pretty_uuid(disk_uuid + len, cmsg->uuid);
380 snprintf(raid_slot, 16, "RAID_DISK=%d", cmsg->raid_slot);
381 pr_info("%s:%d Sending kobject change with %s and %s\n", __func__, __LINE__, disk_uuid, raid_slot);
382 init_completion(&cinfo->newdisk_completion);
383 kobject_uevent_env(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE, envp);
384 wait_for_completion_timeout(&cinfo->newdisk_completion,
385 NEW_DEV_TIMEOUT);
386}
387
388
389static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg)
390{
391 struct md_cluster_info *cinfo = mddev->cluster_info;
392
393 md_reload_sb(mddev);
394 dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
395}
396
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500397static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
398{
399 switch (msg->type) {
400 case METADATA_UPDATED:
401 pr_info("%s: %d Received message: METADATA_UPDATE from %d\n",
402 __func__, __LINE__, msg->slot);
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500403 process_metadata_update(mddev, msg);
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500404 break;
405 case RESYNCING:
406 pr_info("%s: %d Received message: RESYNCING from %d\n",
407 __func__, __LINE__, msg->slot);
Goldwyn Rodriguese59721c2014-06-07 02:30:30 -0500408 process_suspend_info(mddev->cluster_info, msg->slot,
409 msg->low, msg->high);
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500410 break;
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500411 case NEWDISK:
412 pr_info("%s: %d Received message: NEWDISK from %d\n",
413 __func__, __LINE__, msg->slot);
414 process_add_new_disk(mddev, msg);
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500415 };
416}
417
418/*
419 * thread for receiving message
420 */
421static void recv_daemon(struct md_thread *thread)
422{
423 struct md_cluster_info *cinfo = thread->mddev->cluster_info;
424 struct dlm_lock_resource *ack_lockres = cinfo->ack_lockres;
425 struct dlm_lock_resource *message_lockres = cinfo->message_lockres;
426 struct cluster_msg msg;
427
428 /*get CR on Message*/
429 if (dlm_lock_sync(message_lockres, DLM_LOCK_CR)) {
430 pr_err("md/raid1:failed to get CR on MESSAGE\n");
431 return;
432 }
433
434 /* read lvb and wake up thread to process this message_lockres */
435 memcpy(&msg, message_lockres->lksb.sb_lvbptr, sizeof(struct cluster_msg));
436 process_recvd_msg(thread->mddev, &msg);
437
438 /*release CR on ack_lockres*/
439 dlm_unlock_sync(ack_lockres);
440 /*up-convert to EX on message_lockres*/
441 dlm_lock_sync(message_lockres, DLM_LOCK_EX);
442 /*get CR on ack_lockres again*/
443 dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
444 /*release CR on message_lockres*/
445 dlm_unlock_sync(message_lockres);
446}
447
Goldwyn Rodrigues601b5152014-06-07 01:28:53 -0500448/* lock_comm()
449 * Takes the lock on the TOKEN lock resource so no other
450 * node can communicate while the operation is underway.
451 */
452static int lock_comm(struct md_cluster_info *cinfo)
453{
454 int error;
455
456 error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
457 if (error)
458 pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n",
459 __func__, __LINE__, error);
460 return error;
461}
462
463static void unlock_comm(struct md_cluster_info *cinfo)
464{
465 dlm_unlock_sync(cinfo->token_lockres);
466}
467
468/* __sendmsg()
469 * This function performs the actual sending of the message. This function is
470 * usually called after performing the encompassing operation
471 * The function:
472 * 1. Grabs the message lockresource in EX mode
473 * 2. Copies the message to the message LVB
474 * 3. Downconverts message lockresource to CR
475 * 4. Upconverts ack lock resource from CR to EX. This forces the BAST on other nodes
476 * and the other nodes read the message. The thread will wait here until all other
477 * nodes have released ack lock resource.
478 * 5. Downconvert ack lockresource to CR
479 */
480static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
481{
482 int error;
483 int slot = cinfo->slot_number - 1;
484
485 cmsg->slot = cpu_to_le32(slot);
486 /*get EX on Message*/
487 error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_EX);
488 if (error) {
489 pr_err("md-cluster: failed to get EX on MESSAGE (%d)\n", error);
490 goto failed_message;
491 }
492
493 memcpy(cinfo->message_lockres->lksb.sb_lvbptr, (void *)cmsg,
494 sizeof(struct cluster_msg));
495 /*down-convert EX to CR on Message*/
496 error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_CR);
497 if (error) {
498 pr_err("md-cluster: failed to convert EX to CR on MESSAGE(%d)\n",
499 error);
500 goto failed_message;
501 }
502
503 /*up-convert CR to EX on Ack*/
504 error = dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_EX);
505 if (error) {
506 pr_err("md-cluster: failed to convert CR to EX on ACK(%d)\n",
507 error);
508 goto failed_ack;
509 }
510
511 /*down-convert EX to CR on Ack*/
512 error = dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR);
513 if (error) {
514 pr_err("md-cluster: failed to convert EX to CR on ACK(%d)\n",
515 error);
516 goto failed_ack;
517 }
518
519failed_ack:
520 dlm_unlock_sync(cinfo->message_lockres);
521failed_message:
522 return error;
523}
524
525static int sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
526{
527 int ret;
528
529 lock_comm(cinfo);
530 ret = __sendmsg(cinfo, cmsg);
531 unlock_comm(cinfo);
532 return ret;
533}
534
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -0500535static int gather_all_resync_info(struct mddev *mddev, int total_slots)
536{
537 struct md_cluster_info *cinfo = mddev->cluster_info;
538 int i, ret = 0;
539 struct dlm_lock_resource *bm_lockres;
540 struct suspend_info *s;
541 char str[64];
542
543
544 for (i = 0; i < total_slots; i++) {
545 memset(str, '\0', 64);
546 snprintf(str, 64, "bitmap%04d", i);
547 bm_lockres = lockres_init(mddev, str, NULL, 1);
548 if (!bm_lockres)
549 return -ENOMEM;
550 if (i == (cinfo->slot_number - 1))
551 continue;
552
553 bm_lockres->flags |= DLM_LKF_NOQUEUE;
554 ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
555 if (ret == -EAGAIN) {
556 memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE);
557 s = read_resync_info(mddev, bm_lockres);
558 if (s) {
559 pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
560 __func__, __LINE__,
561 (unsigned long long) s->lo,
562 (unsigned long long) s->hi, i);
563 spin_lock_irq(&cinfo->suspend_lock);
564 s->slot = i;
565 list_add(&s->list, &cinfo->suspend_list);
566 spin_unlock_irq(&cinfo->suspend_lock);
567 }
568 ret = 0;
569 lockres_free(bm_lockres);
570 continue;
571 }
572 if (ret)
573 goto out;
574 /* TODO: Read the disk bitmap sb and check if it needs recovery */
575 dlm_unlock_sync(bm_lockres);
576 lockres_free(bm_lockres);
577 }
578out:
579 return ret;
580}
581
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -0500582static int join(struct mddev *mddev, int nodes)
583{
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500584 struct md_cluster_info *cinfo;
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500585 int ret, ops_rv;
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500586 char str[64];
587
588 if (!try_module_get(THIS_MODULE))
589 return -ENOENT;
590
591 cinfo = kzalloc(sizeof(struct md_cluster_info), GFP_KERNEL);
592 if (!cinfo)
593 return -ENOMEM;
594
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500595 init_completion(&cinfo->completion);
596
597 mutex_init(&cinfo->sb_mutex);
598 mddev->cluster_info = cinfo;
599
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500600 memset(str, 0, 64);
601 pretty_uuid(str, mddev->uuid);
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500602 ret = dlm_new_lockspace(str, mddev->bitmap_info.cluster_name,
603 DLM_LSFL_FS, LVB_SIZE,
604 &md_ls_ops, mddev, &ops_rv, &cinfo->lockspace);
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500605 if (ret)
606 goto err;
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500607 wait_for_completion(&cinfo->completion);
Goldwyn Rodriguesb97e92572014-06-06 11:50:56 -0500608 if (nodes <= cinfo->slot_number) {
609 pr_err("md-cluster: Slot allotted(%d) greater than available slots(%d)", cinfo->slot_number - 1,
610 nodes);
611 ret = -ERANGE;
612 goto err;
613 }
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500614 cinfo->sb_lock = lockres_init(mddev, "cmd-super",
615 NULL, 0);
616 if (!cinfo->sb_lock) {
617 ret = -ENOMEM;
618 goto err;
619 }
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500620 /* Initiate the communication resources */
621 ret = -ENOMEM;
622 cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv");
623 if (!cinfo->recv_thread) {
624 pr_err("md-cluster: cannot allocate memory for recv_thread!\n");
625 goto err;
626 }
627 cinfo->message_lockres = lockres_init(mddev, "message", NULL, 1);
628 if (!cinfo->message_lockres)
629 goto err;
630 cinfo->token_lockres = lockres_init(mddev, "token", NULL, 0);
631 if (!cinfo->token_lockres)
632 goto err;
633 cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
634 if (!cinfo->ack_lockres)
635 goto err;
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500636 cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0);
637 if (!cinfo->no_new_dev_lockres)
638 goto err;
639
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500640 /* get sync CR lock on ACK. */
641 if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
642 pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
643 ret);
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500644 /* get sync CR lock on no-new-dev. */
645 if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR))
646 pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret);
647
Goldwyn Rodrigues54519c52014-06-06 12:12:32 -0500648
649 pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number);
650 snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1);
651 cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1);
652 if (!cinfo->bitmap_lockres)
653 goto err;
654 if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) {
655 pr_err("Failed to get bitmap lock\n");
656 ret = -EINVAL;
657 goto err;
658 }
659
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -0500660 INIT_LIST_HEAD(&cinfo->suspend_list);
661 spin_lock_init(&cinfo->suspend_lock);
662
663 ret = gather_all_resync_info(mddev, nodes);
664 if (ret)
665 goto err;
666
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -0500667 return 0;
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500668err:
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500669 lockres_free(cinfo->message_lockres);
670 lockres_free(cinfo->token_lockres);
671 lockres_free(cinfo->ack_lockres);
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500672 lockres_free(cinfo->no_new_dev_lockres);
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -0500673 lockres_free(cinfo->bitmap_lockres);
674 lockres_free(cinfo->sb_lock);
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500675 if (cinfo->lockspace)
676 dlm_release_lockspace(cinfo->lockspace, 2);
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500677 mddev->cluster_info = NULL;
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500678 kfree(cinfo);
679 module_put(THIS_MODULE);
680 return ret;
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -0500681}
682
683static int leave(struct mddev *mddev)
684{
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500685 struct md_cluster_info *cinfo = mddev->cluster_info;
686
687 if (!cinfo)
688 return 0;
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -0500689 md_unregister_thread(&cinfo->recovery_thread);
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500690 md_unregister_thread(&cinfo->recv_thread);
691 lockres_free(cinfo->message_lockres);
692 lockres_free(cinfo->token_lockres);
693 lockres_free(cinfo->ack_lockres);
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500694 lockres_free(cinfo->no_new_dev_lockres);
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500695 lockres_free(cinfo->sb_lock);
Goldwyn Rodrigues54519c52014-06-06 12:12:32 -0500696 lockres_free(cinfo->bitmap_lockres);
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500697 dlm_release_lockspace(cinfo->lockspace, 2);
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -0500698 return 0;
699}
700
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500701/* slot_number(): Returns the MD slot number to use
702 * DLM starts the slot numbers from 1, wheras cluster-md
703 * wants the number to be from zero, so we deduct one
704 */
705static int slot_number(struct mddev *mddev)
706{
707 struct md_cluster_info *cinfo = mddev->cluster_info;
708
709 return cinfo->slot_number - 1;
710}
711
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -0500712static void resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
713{
714 struct md_cluster_info *cinfo = mddev->cluster_info;
715
716 add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
717 /* Re-acquire the lock to refresh LVB */
718 dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
719}
720
Goldwyn Rodrigues293467a2014-06-07 01:44:51 -0500721static int metadata_update_start(struct mddev *mddev)
722{
723 return lock_comm(mddev->cluster_info);
724}
725
726static int metadata_update_finish(struct mddev *mddev)
727{
728 struct md_cluster_info *cinfo = mddev->cluster_info;
729 struct cluster_msg cmsg;
730 int ret;
731
732 memset(&cmsg, 0, sizeof(cmsg));
733 cmsg.type = cpu_to_le32(METADATA_UPDATED);
734 ret = __sendmsg(cinfo, &cmsg);
735 unlock_comm(cinfo);
736 return ret;
737}
738
739static int metadata_update_cancel(struct mddev *mddev)
740{
741 struct md_cluster_info *cinfo = mddev->cluster_info;
742
743 return dlm_unlock_sync(cinfo->token_lockres);
744}
745
Goldwyn Rodrigues965400e2014-06-07 02:16:58 -0500746static int resync_send(struct mddev *mddev, enum msg_type type,
747 sector_t lo, sector_t hi)
748{
749 struct md_cluster_info *cinfo = mddev->cluster_info;
750 struct cluster_msg cmsg;
751 int slot = cinfo->slot_number - 1;
752
753 pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__,
754 (unsigned long long)lo,
755 (unsigned long long)hi);
756 resync_info_update(mddev, lo, hi);
757 cmsg.type = cpu_to_le32(type);
758 cmsg.slot = cpu_to_le32(slot);
759 cmsg.low = cpu_to_le64(lo);
760 cmsg.high = cpu_to_le64(hi);
761 return sendmsg(cinfo, &cmsg);
762}
763
764static int resync_start(struct mddev *mddev, sector_t lo, sector_t hi)
765{
766 pr_info("%s:%d\n", __func__, __LINE__);
767 return resync_send(mddev, RESYNCING, lo, hi);
768}
769
770static void resync_finish(struct mddev *mddev)
771{
772 pr_info("%s:%d\n", __func__, __LINE__);
773 resync_send(mddev, RESYNCING, 0, 0);
774}
775
Goldwyn Rodrigues589a1c42014-06-07 02:39:37 -0500776static int area_resyncing(struct mddev *mddev, sector_t lo, sector_t hi)
777{
778 struct md_cluster_info *cinfo = mddev->cluster_info;
779 int ret = 0;
780 struct suspend_info *s;
781
782 spin_lock_irq(&cinfo->suspend_lock);
783 if (list_empty(&cinfo->suspend_list))
784 goto out;
785 list_for_each_entry(s, &cinfo->suspend_list, list)
786 if (hi > s->lo && lo < s->hi) {
787 ret = 1;
788 break;
789 }
790out:
791 spin_unlock_irq(&cinfo->suspend_lock);
792 return ret;
793}
794
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500795static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev)
796{
797 struct md_cluster_info *cinfo = mddev->cluster_info;
798 struct cluster_msg cmsg;
799 int ret = 0;
800 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
801 char *uuid = sb->device_uuid;
802
803 memset(&cmsg, 0, sizeof(cmsg));
804 cmsg.type = cpu_to_le32(NEWDISK);
805 memcpy(cmsg.uuid, uuid, 16);
806 cmsg.raid_slot = rdev->desc_nr;
807 lock_comm(cinfo);
808 ret = __sendmsg(cinfo, &cmsg);
809 if (ret)
810 return ret;
811 cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE;
812 ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX);
813 cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE;
814 /* Some node does not "see" the device */
815 if (ret == -EAGAIN)
816 ret = -ENOENT;
817 else
818 dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
819 return ret;
820}
821
822static int add_new_disk_finish(struct mddev *mddev)
823{
824 struct cluster_msg cmsg;
825 struct md_cluster_info *cinfo = mddev->cluster_info;
826 int ret;
827 /* Write sb and inform others */
828 md_update_sb(mddev, 1);
829 cmsg.type = METADATA_UPDATED;
830 ret = __sendmsg(cinfo, &cmsg);
831 unlock_comm(cinfo);
832 return ret;
833}
834
835static void new_disk_ack(struct mddev *mddev, bool ack)
836{
837 struct md_cluster_info *cinfo = mddev->cluster_info;
838
839 if (ack)
840 dlm_unlock_sync(cinfo->no_new_dev_lockres);
841 complete(&cinfo->newdisk_completion);
842}
843
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -0500844static struct md_cluster_operations cluster_ops = {
845 .join = join,
846 .leave = leave,
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500847 .slot_number = slot_number,
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -0500848 .resync_info_update = resync_info_update,
Goldwyn Rodrigues965400e2014-06-07 02:16:58 -0500849 .resync_start = resync_start,
850 .resync_finish = resync_finish,
Goldwyn Rodrigues293467a2014-06-07 01:44:51 -0500851 .metadata_update_start = metadata_update_start,
852 .metadata_update_finish = metadata_update_finish,
853 .metadata_update_cancel = metadata_update_cancel,
Goldwyn Rodrigues589a1c42014-06-07 02:39:37 -0500854 .area_resyncing = area_resyncing,
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500855 .add_new_disk_start = add_new_disk_start,
856 .add_new_disk_finish = add_new_disk_finish,
857 .new_disk_ack = new_disk_ack,
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -0500858};
859
Goldwyn Rodrigues8e854e92014-03-07 11:21:15 -0600860static int __init cluster_init(void)
861{
862 pr_warn("md-cluster: EXPERIMENTAL. Use with caution\n");
863 pr_info("Registering Cluster MD functions\n");
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -0500864 register_md_cluster_operations(&cluster_ops, THIS_MODULE);
Goldwyn Rodrigues8e854e92014-03-07 11:21:15 -0600865 return 0;
866}
867
868static void cluster_exit(void)
869{
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -0500870 unregister_md_cluster_operations();
Goldwyn Rodrigues8e854e92014-03-07 11:21:15 -0600871}
872
873module_init(cluster_init);
874module_exit(cluster_exit);
875MODULE_LICENSE("GPL");
876MODULE_DESCRIPTION("Clustering support for MD");