Blame - fs/dlm/recover.c - kernel/linux-linaro-stable.git

blob: 81b239304495450ad97a9e5f2f1efd292783ec08 [file] [log] [blame]

David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	1	/******************************************************************************
				2	*******************************************************************************
				3	**
				4	** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
				5	** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
				6	**
				7	** This copyrighted material is made available to anyone wishing to use,
				8	** modify, copy, or redistribute it subject to the terms and conditions
				9	** of the GNU General Public License v.2.
				10	**
				11	*******************************************************************************
				12	******************************************************************************/
				13
				14	#include "dlm_internal.h"
				15	#include "lockspace.h"
				16	#include "dir.h"
				17	#include "config.h"
				18	#include "ast.h"
				19	#include "memory.h"
				20	#include "rcom.h"
				21	#include "lock.h"
				22	#include "lowcomms.h"
				23	#include "member.h"
				24	#include "recover.h"
				25
				26
				27	/*
				28	* Recovery waiting routines: these functions wait for a particular reply from
				29	* a remote node, or for the remote node to report a certain status. They need
				30	* to abort if the lockspace is stopped indicating a node has failed (perhaps
				31	* the one being waited for).
				32	*/
				33
				34	/*
				35	* Wait until given function returns non-zero or lockspace is stopped
				36	* (LS_RECOVERY_STOP set due to failure of a node in ls_nodes). When another
				37	* function thinks it could have completed the waited-on task, they should wake
				38	* up ls_wait_general to get an immediate response rather than waiting for the
				39	* timer to detect the result. A timer wakes us up periodically while waiting
				40	* to see if we should abort due to a node failure. This should only be called
				41	* by the dlm_recoverd thread.
				42	*/
				43
				44	static void dlm_wait_timer_fn(unsigned long data)
				45	{
				46	struct dlm_ls ls = (struct dlm_ls ) data;
David Teigland	68c817a	2007-01-09 09:41:48 -0600	[diff] [blame]	47	mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ));
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	48	wake_up(&ls->ls_wait_general);
				49	}
				50
				51	int dlm_wait_function(struct dlm_ls ls, int (testfn) (struct dlm_ls *ls))
				52	{
				53	int error = 0;
				54
				55	init_timer(&ls->ls_timer);
				56	ls->ls_timer.function = dlm_wait_timer_fn;
				57	ls->ls_timer.data = (long) ls;
David Teigland	68c817a	2007-01-09 09:41:48 -0600	[diff] [blame]	58	ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ);
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	59	add_timer(&ls->ls_timer);
				60
				61	wait_event(ls->ls_wait_general, testfn(ls) \|\| dlm_recovery_stopped(ls));
				62	del_timer_sync(&ls->ls_timer);
				63
				64	if (dlm_recovery_stopped(ls)) {
				65	log_debug(ls, "dlm_wait_function aborted");
				66	error = -EINTR;
				67	}
				68	return error;
				69	}
				70
				71	/*
				72	* An efficient way for all nodes to wait for all others to have a certain
				73	* status. The node with the lowest nodeid polls all the others for their
				74	* status (wait_status_all) and all the others poll the node with the low id
				75	* for its accumulated result (wait_status_low). When all nodes have set
				76	* status flag X, then status flag X_ALL will be set on the low nodeid.
				77	*/
				78
				79	uint32_t dlm_recover_status(struct dlm_ls *ls)
				80	{
				81	uint32_t status;
				82	spin_lock(&ls->ls_recover_lock);
				83	status = ls->ls_recover_status;
				84	spin_unlock(&ls->ls_recover_lock);
				85	return status;
				86	}
				87
				88	void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status)
				89	{
				90	spin_lock(&ls->ls_recover_lock);
				91	ls->ls_recover_status \|= status;
				92	spin_unlock(&ls->ls_recover_lock);
				93	}
				94
				95	static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status)
				96	{
Al Viro	4007685	2008-01-25 03:01:51 -0500	[diff] [blame]	97	struct dlm_rcom *rc = ls->ls_recover_buf;
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	98	struct dlm_member *memb;
				99	int error = 0, delay;
				100
				101	list_for_each_entry(memb, &ls->ls_nodes, list) {
				102	delay = 0;
				103	for (;;) {
				104	if (dlm_recovery_stopped(ls)) {
				105	error = -EINTR;
				106	goto out;
				107	}
				108
				109	error = dlm_rcom_status(ls, memb->nodeid);
				110	if (error)
				111	goto out;
				112
				113	if (rc->rc_result & wait_status)
				114	break;
				115	if (delay < 1000)
				116	delay += 20;
				117	msleep(delay);
				118	}
				119	}
				120	out:
				121	return error;
				122	}
				123
				124	static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status)
				125	{
Al Viro	4007685	2008-01-25 03:01:51 -0500	[diff] [blame]	126	struct dlm_rcom *rc = ls->ls_recover_buf;
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	127	int error = 0, delay = 0, nodeid = ls->ls_low_nodeid;
				128
				129	for (;;) {
				130	if (dlm_recovery_stopped(ls)) {
				131	error = -EINTR;
				132	goto out;
				133	}
				134
				135	error = dlm_rcom_status(ls, nodeid);
				136	if (error)
				137	break;
				138
				139	if (rc->rc_result & wait_status)
				140	break;
				141	if (delay < 1000)
				142	delay += 20;
				143	msleep(delay);
				144	}
				145	out:
				146	return error;
				147	}
				148
				149	static int wait_status(struct dlm_ls *ls, uint32_t status)
				150	{
				151	uint32_t status_all = status << 1;
				152	int error;
				153
				154	if (ls->ls_low_nodeid == dlm_our_nodeid()) {
				155	error = wait_status_all(ls, status);
				156	if (!error)
				157	dlm_set_recover_status(ls, status_all);
				158	} else
				159	error = wait_status_low(ls, status_all);
				160
				161	return error;
				162	}
				163
				164	int dlm_recover_members_wait(struct dlm_ls *ls)
				165	{
				166	return wait_status(ls, DLM_RS_NODES);
				167	}
				168
				169	int dlm_recover_directory_wait(struct dlm_ls *ls)
				170	{
				171	return wait_status(ls, DLM_RS_DIR);
				172	}
				173
				174	int dlm_recover_locks_wait(struct dlm_ls *ls)
				175	{
				176	return wait_status(ls, DLM_RS_LOCKS);
				177	}
				178
				179	int dlm_recover_done_wait(struct dlm_ls *ls)
				180	{
				181	return wait_status(ls, DLM_RS_DONE);
				182	}
				183
				184	/*
				185	* The recover_list contains all the rsb's for which we've requested the new
				186	* master nodeid. As replies are returned from the resource directories the
				187	* rsb's are removed from the list. When the list is empty we're done.
				188	*
				189	* The recover_list is later similarly used for all rsb's for which we've sent
				190	* new lkb's and need to receive new corresponding lkid's.
				191	*
				192	* We use the address of the rsb struct as a simple local identifier for the
				193	* rsb so we can match an rcom reply with the rsb it was sent for.
				194	*/
				195
				196	static int recover_list_empty(struct dlm_ls *ls)
				197	{
				198	int empty;
				199
				200	spin_lock(&ls->ls_recover_list_lock);
				201	empty = list_empty(&ls->ls_recover_list);
				202	spin_unlock(&ls->ls_recover_list_lock);
				203
				204	return empty;
				205	}
				206
				207	static void recover_list_add(struct dlm_rsb *r)
				208	{
				209	struct dlm_ls *ls = r->res_ls;
				210
				211	spin_lock(&ls->ls_recover_list_lock);
				212	if (list_empty(&r->res_recover_list)) {
				213	list_add_tail(&r->res_recover_list, &ls->ls_recover_list);
				214	ls->ls_recover_list_count++;
				215	dlm_hold_rsb(r);
				216	}
				217	spin_unlock(&ls->ls_recover_list_lock);
				218	}
				219
				220	static void recover_list_del(struct dlm_rsb *r)
				221	{
				222	struct dlm_ls *ls = r->res_ls;
				223
				224	spin_lock(&ls->ls_recover_list_lock);
				225	list_del_init(&r->res_recover_list);
				226	ls->ls_recover_list_count--;
				227	spin_unlock(&ls->ls_recover_list_lock);
				228
				229	dlm_put_rsb(r);
				230	}
				231
				232	static struct dlm_rsb recover_list_find(struct dlm_ls ls, uint64_t id)
				233	{
				234	struct dlm_rsb *r = NULL;
				235
				236	spin_lock(&ls->ls_recover_list_lock);
				237
				238	list_for_each_entry(r, &ls->ls_recover_list, res_recover_list) {
				239	if (id == (unsigned long) r)
				240	goto out;
				241	}
				242	r = NULL;
				243	out:
				244	spin_unlock(&ls->ls_recover_list_lock);
				245	return r;
				246	}
				247
				248	static void recover_list_clear(struct dlm_ls *ls)
				249	{
				250	struct dlm_rsb r, s;
				251
				252	spin_lock(&ls->ls_recover_list_lock);
				253	list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) {
				254	list_del_init(&r->res_recover_list);
David Teigland	5206980	2006-11-02 09:49:02 -0600	[diff] [blame]	255	r->res_recover_locks_count = 0;
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	256	dlm_put_rsb(r);
				257	ls->ls_recover_list_count--;
				258	}
				259
				260	if (ls->ls_recover_list_count != 0) {
				261	log_error(ls, "warning: recover_list_count %d",
				262	ls->ls_recover_list_count);
				263	ls->ls_recover_list_count = 0;
				264	}
				265	spin_unlock(&ls->ls_recover_list_lock);
				266	}
				267
				268
				269	/* Master recovery: find new master node for rsb's that were
				270	mastered on nodes that have been removed.
				271
				272	dlm_recover_masters
				273	recover_master
				274	dlm_send_rcom_lookup -> receive_rcom_lookup
				275	dlm_dir_lookup
				276	receive_rcom_lookup_reply <-
				277	dlm_recover_master_reply
				278	set_new_master
				279	set_master_lkbs
				280	set_lock_master
				281	*/
				282
				283	/*
				284	* Set the lock master for all LKBs in a lock queue
				285	* If we are the new master of the rsb, we may have received new
				286	* MSTCPY locks from other nodes already which we need to ignore
				287	* when setting the new nodeid.
				288	*/
				289
				290	static void set_lock_master(struct list_head *queue, int nodeid)
				291	{
				292	struct dlm_lkb *lkb;
				293
				294	list_for_each_entry(lkb, queue, lkb_statequeue)
				295	if (!(lkb->lkb_flags & DLM_IFL_MSTCPY))
				296	lkb->lkb_nodeid = nodeid;
				297	}
				298
				299	static void set_master_lkbs(struct dlm_rsb *r)
				300	{
				301	set_lock_master(&r->res_grantqueue, r->res_nodeid);
				302	set_lock_master(&r->res_convertqueue, r->res_nodeid);
				303	set_lock_master(&r->res_waitqueue, r->res_nodeid);
				304	}
				305
				306	/*
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	307	* Propagate the new master nodeid to locks
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	308	* The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider.
David Teigland	f7da790	2006-07-25 13:53:33 -0500	[diff] [blame]	309	* The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which
				310	* rsb's to consider.
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	311	*/
				312
				313	static void set_new_master(struct dlm_rsb *r, int nodeid)
				314	{
				315	lock_rsb(r);
				316	r->res_nodeid = nodeid;
				317	set_master_lkbs(r);
				318	rsb_set_flag(r, RSB_NEW_MASTER);
				319	rsb_set_flag(r, RSB_NEW_MASTER2);
				320	unlock_rsb(r);
				321	}
				322
				323	/*
				324	* We do async lookups on rsb's that need new masters. The rsb's
				325	* waiting for a lookup reply are kept on the recover_list.
				326	*/
				327
				328	static int recover_master(struct dlm_rsb *r)
				329	{
				330	struct dlm_ls *ls = r->res_ls;
				331	int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid();
				332
				333	dir_nodeid = dlm_dir_nodeid(r);
				334
				335	if (dir_nodeid == our_nodeid) {
				336	error = dlm_dir_lookup(ls, our_nodeid, r->res_name,
				337	r->res_length, &ret_nodeid);
				338	if (error)
				339	log_error(ls, "recover dir lookup error %d", error);
				340
				341	if (ret_nodeid == our_nodeid)
				342	ret_nodeid = 0;
				343	set_new_master(r, ret_nodeid);
				344	} else {
				345	recover_list_add(r);
				346	error = dlm_send_rcom_lookup(r, dir_nodeid);
				347	}
				348
				349	return error;
				350	}
				351
				352	/*
				353	* When not using a directory, most resource names will hash to a new static
				354	* master nodeid and the resource will need to be remastered.
				355	*/
				356
				357	static int recover_master_static(struct dlm_rsb *r)
				358	{
				359	int master = dlm_dir_nodeid(r);
				360
				361	if (master == dlm_our_nodeid())
				362	master = 0;
				363
				364	if (r->res_nodeid != master) {
				365	if (is_master(r))
				366	dlm_purge_mstcpy_locks(r);
				367	set_new_master(r, master);
				368	return 1;
				369	}
				370	return 0;
				371	}
				372
				373	/*
				374	* Go through local root resources and for each rsb which has a master which
				375	* has departed, get the new master nodeid from the directory. The dir will
				376	* assign mastery to the first node to look up the new master. That means
				377	* we'll discover in this lookup if we're the new master of any rsb's.
				378	*
				379	* We fire off all the dir lookup requests individually and asynchronously to
				380	* the correct dir node.
				381	*/
				382
				383	int dlm_recover_masters(struct dlm_ls *ls)
				384	{
				385	struct dlm_rsb *r;
				386	int error = 0, count = 0;
				387
				388	log_debug(ls, "dlm_recover_masters");
				389
				390	down_read(&ls->ls_root_sem);
				391	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
				392	if (dlm_recovery_stopped(ls)) {
				393	up_read(&ls->ls_root_sem);
				394	error = -EINTR;
				395	goto out;
				396	}
				397
				398	if (dlm_no_directory(ls))
				399	count += recover_master_static(r);
David Teigland	222d396	2007-01-15 10:28:22 -0600	[diff] [blame]	400	else if (!is_master(r) &&
				401	(dlm_is_removed(ls, r->res_nodeid) \|\|
				402	rsb_flag(r, RSB_NEW_MASTER))) {
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	403	recover_master(r);
				404	count++;
				405	}
				406
				407	schedule();
				408	}
				409	up_read(&ls->ls_root_sem);
				410
				411	log_debug(ls, "dlm_recover_masters %d resources", count);
				412
				413	error = dlm_wait_function(ls, &recover_list_empty);
				414	out:
				415	if (error)
				416	recover_list_clear(ls);
				417	return error;
				418	}
				419
				420	int dlm_recover_master_reply(struct dlm_ls ls, struct dlm_rcom rc)
				421	{
				422	struct dlm_rsb *r;
				423	int nodeid;
				424
				425	r = recover_list_find(ls, rc->rc_id);
				426	if (!r) {
David Teigland	9013592	2006-01-20 08:47:07 +0000	[diff] [blame]	427	log_error(ls, "dlm_recover_master_reply no id %llx",
David Teigland	9229f01	2006-05-24 09:21:30 -0400	[diff] [blame]	428	(unsigned long long)rc->rc_id);
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	429	goto out;
				430	}
				431
				432	nodeid = rc->rc_result;
				433	if (nodeid == dlm_our_nodeid())
				434	nodeid = 0;
				435
				436	set_new_master(r, nodeid);
				437	recover_list_del(r);
				438
				439	if (recover_list_empty(ls))
				440	wake_up(&ls->ls_wait_general);
				441	out:
				442	return 0;
				443	}
				444
				445
				446	/* Lock recovery: rebuild the process-copy locks we hold on a
				447	remastered rsb on the new rsb master.
				448
				449	dlm_recover_locks
				450	recover_locks
				451	recover_locks_queue
				452	dlm_send_rcom_lock -> receive_rcom_lock
				453	dlm_recover_master_copy
				454	receive_rcom_lock_reply <-
				455	dlm_recover_process_copy
				456	*/
				457
				458
				459	/*
				460	* keep a count of the number of lkb's we send to the new master; when we get
				461	* an equal number of replies then recovery for the rsb is done
				462	*/
				463
				464	static int recover_locks_queue(struct dlm_rsb r, struct list_head head)
				465	{
				466	struct dlm_lkb *lkb;
				467	int error = 0;
				468
				469	list_for_each_entry(lkb, head, lkb_statequeue) {
				470	error = dlm_send_rcom_lock(r, lkb);
				471	if (error)
				472	break;
				473	r->res_recover_locks_count++;
				474	}
				475
				476	return error;
				477	}
				478
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	479	static int recover_locks(struct dlm_rsb *r)
				480	{
				481	int error = 0;
				482
				483	lock_rsb(r);
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	484
David Teigland	a345da3	2006-08-18 11:54:25 -0500	[diff] [blame]	485	DLM_ASSERT(!r->res_recover_locks_count, dlm_dump_rsb(r););
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	486
				487	error = recover_locks_queue(r, &r->res_grantqueue);
				488	if (error)
				489	goto out;
				490	error = recover_locks_queue(r, &r->res_convertqueue);
				491	if (error)
				492	goto out;
				493	error = recover_locks_queue(r, &r->res_waitqueue);
				494	if (error)
				495	goto out;
				496
				497	if (r->res_recover_locks_count)
				498	recover_list_add(r);
				499	else
				500	rsb_clear_flag(r, RSB_NEW_MASTER);
				501	out:
				502	unlock_rsb(r);
				503	return error;
				504	}
				505
				506	int dlm_recover_locks(struct dlm_ls *ls)
				507	{
				508	struct dlm_rsb *r;
				509	int error, count = 0;
				510
				511	log_debug(ls, "dlm_recover_locks");
				512
				513	down_read(&ls->ls_root_sem);
				514	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
				515	if (is_master(r)) {
				516	rsb_clear_flag(r, RSB_NEW_MASTER);
				517	continue;
				518	}
				519
				520	if (!rsb_flag(r, RSB_NEW_MASTER))
				521	continue;
				522
				523	if (dlm_recovery_stopped(ls)) {
				524	error = -EINTR;
				525	up_read(&ls->ls_root_sem);
				526	goto out;
				527	}
				528
				529	error = recover_locks(r);
				530	if (error) {
				531	up_read(&ls->ls_root_sem);
				532	goto out;
				533	}
				534
				535	count += r->res_recover_locks_count;
				536	}
				537	up_read(&ls->ls_root_sem);
				538
				539	log_debug(ls, "dlm_recover_locks %d locks", count);
				540
				541	error = dlm_wait_function(ls, &recover_list_empty);
				542	out:
				543	if (error)
				544	recover_list_clear(ls);
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	545	return error;
				546	}
				547
				548	void dlm_recovered_lock(struct dlm_rsb *r)
				549	{
David Teigland	a345da3	2006-08-18 11:54:25 -0500	[diff] [blame]	550	DLM_ASSERT(rsb_flag(r, RSB_NEW_MASTER), dlm_dump_rsb(r););
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	551
				552	r->res_recover_locks_count--;
				553	if (!r->res_recover_locks_count) {
				554	rsb_clear_flag(r, RSB_NEW_MASTER);
				555	recover_list_del(r);
				556	}
				557
				558	if (recover_list_empty(r->res_ls))
				559	wake_up(&r->res_ls->ls_wait_general);
				560	}
				561
				562	/*
				563	* The lvb needs to be recovered on all master rsb's. This includes setting
				564	* the VALNOTVALID flag if necessary, and determining the correct lvb contents
				565	* based on the lvb's of the locks held on the rsb.
				566	*
				567	* RSB_VALNOTVALID is set if there are only NL/CR locks on the rsb. If it
				568	* was already set prior to recovery, it's not cleared, regardless of locks.
				569	*
				570	* The LVB contents are only considered for changing when this is a new master
				571	* of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with
				572	* mode > CR. If no lkb's exist with mode above CR, the lvb contents are taken
				573	* from the lkb with the largest lvb sequence number.
				574	*/
				575
				576	static void recover_lvb(struct dlm_rsb *r)
				577	{
				578	struct dlm_lkb lkb, high_lkb = NULL;
				579	uint32_t high_seq = 0;
David Teigland	9013592	2006-01-20 08:47:07 +0000	[diff] [blame]	580	int lock_lvb_exists = 0;
				581	int big_lock_exists = 0;
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	582	int lvblen = r->res_ls->ls_lvblen;
				583
				584	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
				585	if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
				586	continue;
				587
David Teigland	9013592	2006-01-20 08:47:07 +0000	[diff] [blame]	588	lock_lvb_exists = 1;
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	589
				590	if (lkb->lkb_grmode > DLM_LOCK_CR) {
David Teigland	9013592	2006-01-20 08:47:07 +0000	[diff] [blame]	591	big_lock_exists = 1;
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	592	goto setflag;
				593	}
				594
				595	if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
				596	high_lkb = lkb;
				597	high_seq = lkb->lkb_lvbseq;
				598	}
				599	}
				600
				601	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
				602	if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
				603	continue;
				604
David Teigland	9013592	2006-01-20 08:47:07 +0000	[diff] [blame]	605	lock_lvb_exists = 1;
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	606
				607	if (lkb->lkb_grmode > DLM_LOCK_CR) {
David Teigland	9013592	2006-01-20 08:47:07 +0000	[diff] [blame]	608	big_lock_exists = 1;
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	609	goto setflag;
				610	}
				611
				612	if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
				613	high_lkb = lkb;
				614	high_seq = lkb->lkb_lvbseq;
				615	}
				616	}
				617
				618	setflag:
				619	if (!lock_lvb_exists)
				620	goto out;
				621
				622	if (!big_lock_exists)
				623	rsb_set_flag(r, RSB_VALNOTVALID);
				624
				625	/* don't mess with the lvb unless we're the new master */
				626	if (!rsb_flag(r, RSB_NEW_MASTER2))
				627	goto out;
				628
				629	if (!r->res_lvbptr) {
David Teigland	52bda2b	2007-11-07 09:06:49 -0600	[diff] [blame]	630	r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	631	if (!r->res_lvbptr)
				632	goto out;
				633	}
				634
				635	if (big_lock_exists) {
				636	r->res_lvbseq = lkb->lkb_lvbseq;
				637	memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen);
				638	} else if (high_lkb) {
				639	r->res_lvbseq = high_lkb->lkb_lvbseq;
				640	memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen);
				641	} else {
				642	r->res_lvbseq = 0;
				643	memset(r->res_lvbptr, 0, lvblen);
				644	}
				645	out:
				646	return;
				647	}
				648
				649	/* All master rsb's flagged RECOVER_CONVERT need to be looked at. The locks
				650	converting PR->CW or CW->PR need to have their lkb_grmode set. */
				651
				652	static void recover_conversion(struct dlm_rsb *r)
				653	{
				654	struct dlm_lkb *lkb;
				655	int grmode = -1;
				656
				657	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
				658	if (lkb->lkb_grmode == DLM_LOCK_PR \|\|
				659	lkb->lkb_grmode == DLM_LOCK_CW) {
				660	grmode = lkb->lkb_grmode;
				661	break;
				662	}
				663	}
				664
				665	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
				666	if (lkb->lkb_grmode != DLM_LOCK_IV)
				667	continue;
				668	if (grmode == -1)
				669	lkb->lkb_grmode = lkb->lkb_rqmode;
				670	else
				671	lkb->lkb_grmode = grmode;
				672	}
				673	}
				674
David Teigland	f7da790	2006-07-25 13:53:33 -0500	[diff] [blame]	675	/* We've become the new master for this rsb and waiting/converting locks may
				676	need to be granted in dlm_grant_after_purge() due to locks that may have
				677	existed from a removed node. */
				678
				679	static void set_locks_purged(struct dlm_rsb *r)
				680	{
				681	if (!list_empty(&r->res_waitqueue) \|\| !list_empty(&r->res_convertqueue))
				682	rsb_set_flag(r, RSB_LOCKS_PURGED);
				683	}
				684
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	685	void dlm_recover_rsbs(struct dlm_ls *ls)
				686	{
				687	struct dlm_rsb *r;
				688	int count = 0;
				689
				690	log_debug(ls, "dlm_recover_rsbs");
				691
				692	down_read(&ls->ls_root_sem);
				693	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
				694	lock_rsb(r);
				695	if (is_master(r)) {
				696	if (rsb_flag(r, RSB_RECOVER_CONVERT))
				697	recover_conversion(r);
David Teigland	f7da790	2006-07-25 13:53:33 -0500	[diff] [blame]	698	if (rsb_flag(r, RSB_NEW_MASTER2))
				699	set_locks_purged(r);
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	700	recover_lvb(r);
				701	count++;
				702	}
				703	rsb_clear_flag(r, RSB_RECOVER_CONVERT);
David Teigland	f7da790	2006-07-25 13:53:33 -0500	[diff] [blame]	704	rsb_clear_flag(r, RSB_NEW_MASTER2);
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	705	unlock_rsb(r);
				706	}
				707	up_read(&ls->ls_root_sem);
				708
				709	log_debug(ls, "dlm_recover_rsbs %d rsbs", count);
				710	}
				711
				712	/* Create a single list of all root rsb's to be used during recovery */
				713
				714	int dlm_create_root_list(struct dlm_ls *ls)
				715	{
Bob Peterson	9beb3bf	2011-10-26 15:24:55 -0500	[diff] [blame]	716	struct rb_node *n;
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	717	struct dlm_rsb *r;
				718	int i, error = 0;
				719
				720	down_write(&ls->ls_root_sem);
				721	if (!list_empty(&ls->ls_root_list)) {
				722	log_error(ls, "root list not empty");
				723	error = -EINVAL;
				724	goto out;
				725	}
				726
				727	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
David Teigland	c7be761	2009-01-07 16:50:41 -0600	[diff] [blame]	728	spin_lock(&ls->ls_rsbtbl[i].lock);
Bob Peterson	9beb3bf	2011-10-26 15:24:55 -0500	[diff] [blame]	729	for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) {
				730	r = rb_entry(n, struct dlm_rsb, res_hashnode);
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	731	list_add(&r->res_root_list, &ls->ls_root_list);
				732	dlm_hold_rsb(r);
				733	}
David Teigland	85f0379	2008-01-16 13:02:31 -0600	[diff] [blame]	734
				735	/* If we're using a directory, add tossed rsbs to the root
				736	list; they'll have entries created in the new directory,
				737	but no other recovery steps should do anything with them. */
				738
				739	if (dlm_no_directory(ls)) {
David Teigland	c7be761	2009-01-07 16:50:41 -0600	[diff] [blame]	740	spin_unlock(&ls->ls_rsbtbl[i].lock);
David Teigland	85f0379	2008-01-16 13:02:31 -0600	[diff] [blame]	741	continue;
				742	}
				743
Bob Peterson	9beb3bf	2011-10-26 15:24:55 -0500	[diff] [blame]	744	for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = rb_next(n)) {
				745	r = rb_entry(n, struct dlm_rsb, res_hashnode);
David Teigland	85f0379	2008-01-16 13:02:31 -0600	[diff] [blame]	746	list_add(&r->res_root_list, &ls->ls_root_list);
				747	dlm_hold_rsb(r);
				748	}
David Teigland	c7be761	2009-01-07 16:50:41 -0600	[diff] [blame]	749	spin_unlock(&ls->ls_rsbtbl[i].lock);
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	750	}
				751	out:
				752	up_write(&ls->ls_root_sem);
				753	return error;
				754	}
				755
				756	void dlm_release_root_list(struct dlm_ls *ls)
				757	{
				758	struct dlm_rsb r, safe;
				759
				760	down_write(&ls->ls_root_sem);
				761	list_for_each_entry_safe(r, safe, &ls->ls_root_list, res_root_list) {
				762	list_del_init(&r->res_root_list);
				763	dlm_put_rsb(r);
				764	}
				765	up_write(&ls->ls_root_sem);
				766	}
				767
David Teigland	85f0379	2008-01-16 13:02:31 -0600	[diff] [blame]	768	/* If not using a directory, clear the entire toss list, there's no benefit to
				769	caching the master value since it's fixed. If we are using a dir, keep the
				770	rsb's we're the master of. Recovery will add them to the root list and from
				771	there they'll be entered in the rebuilt directory. */
				772
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	773	void dlm_clear_toss_list(struct dlm_ls *ls)
				774	{
Bob Peterson	9beb3bf	2011-10-26 15:24:55 -0500	[diff] [blame]	775	struct rb_node n, next;
				776	struct dlm_rsb *rsb;
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	777	int i;
				778
				779	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
David Teigland	c7be761	2009-01-07 16:50:41 -0600	[diff] [blame]	780	spin_lock(&ls->ls_rsbtbl[i].lock);
Bob Peterson	9beb3bf	2011-10-26 15:24:55 -0500	[diff] [blame]	781	for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = next) {
				782	next = rb_next(n);;
				783	rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
				784	if (dlm_no_directory(ls) \|\| !is_master(rsb)) {
				785	rb_erase(n, &ls->ls_rsbtbl[i].toss);
				786	dlm_free_rsb(rsb);
David Teigland	85f0379	2008-01-16 13:02:31 -0600	[diff] [blame]	787	}
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	788	}
David Teigland	c7be761	2009-01-07 16:50:41 -0600	[diff] [blame]	789	spin_unlock(&ls->ls_rsbtbl[i].lock);
David Teigland	e7fd417	2006-01-18 09:30:29 +0000	[diff] [blame]	790	}
				791	}
				792