Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: ocfs2: add quota call to ocfs2_remove_btree_range() ocfs2: Wakeup the downconvert thread after a successful cancel convert ocfs2: Access the xattr bucket only before modifying it. configfs: Silence lockdep on mkdir(), rmdir() and configfs_depend_item() ocfs2: Fix possible deadlock in ocfs2_write_dquot() ocfs2: Push out dropping of dentry lock to ocfs2_wq
author: Linus Torvalds <torvalds@linux-foundation.org> 2009-02-03 16:50:20 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-02-03 16:50:20 -0800
commit: 3e1c4005134e3a090c64c1bc35f965043bb451f4 (patch)
tree: aae42aa4706ac44091eb119776183f2c61f0aed0 /fs
parent: b987e8e5a986e77069c99fda5da6878751808fe9 (diff)
parent: fd4ef231962ab44fd1004e87f9d7c6809f00cd64 (diff)
9 files changed, 133 insertions, 14 deletions
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8e93341f3e82..9c2358391147 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -553,12 +553,24 @@ static void detach_groups(struct config_group *group)
 
 		child = sd->s_dentry;
 
+		/*
+		 * Note: we hide this from lockdep since we have no way
+		 * to teach lockdep about recursive
+		 * I_MUTEX_PARENT -> I_MUTEX_CHILD patterns along a path
+		 * in an inode tree, which are valid as soon as
+		 * I_MUTEX_PARENT -> I_MUTEX_CHILD is valid from a
+		 * parent inode to one of its children.
+		 */
+		lockdep_off();
 		mutex_lock(&child->d_inode->i_mutex);
+		lockdep_on();
 
 		configfs_detach_group(sd->s_element);
 		child->d_inode->i_flags |= S_DEAD;
 
+		lockdep_off();
 		mutex_unlock(&child->d_inode->i_mutex);
+		lockdep_on();
 
 		d_delete(child);
 		dput(child);
@@ -748,11 +760,22 @@ static int configfs_attach_item(struct config_item *parent_item,
 			 * We are going to remove an inode and its dentry but
 			 * the VFS may already have hit and used them. Thus,
 			 * we must lock them as rmdir() would.
+			 *
+			 * Note: we hide this from lockdep since we have no way
+			 * to teach lockdep about recursive
+			 * I_MUTEX_PARENT -> I_MUTEX_CHILD patterns along a path
+			 * in an inode tree, which are valid as soon as
+			 * I_MUTEX_PARENT -> I_MUTEX_CHILD is valid from a
+			 * parent inode to one of its children.
 			 */
+			lockdep_off();
 			mutex_lock(&dentry->d_inode->i_mutex);
+			lockdep_on();
 			configfs_remove_dir(item);
 			dentry->d_inode->i_flags |= S_DEAD;
+			lockdep_off();
 			mutex_unlock(&dentry->d_inode->i_mutex);
+			lockdep_on();
 			d_delete(dentry);
 		}
 	}
@@ -787,14 +810,25 @@ static int configfs_attach_group(struct config_item *parent_item,
 		 *
 		 * We must also lock the inode to remove it safely in case of
 		 * error, as rmdir() would.
+		 *
+		 * Note: we hide this from lockdep since we have no way
+		 * to teach lockdep about recursive
+		 * I_MUTEX_PARENT -> I_MUTEX_CHILD patterns along a path
+		 * in an inode tree, which are valid as soon as
+		 * I_MUTEX_PARENT -> I_MUTEX_CHILD is valid from a
+		 * parent inode to one of its children.
 		 */
+		lockdep_off();
 		mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+		lockdep_on();
 		ret = populate_groups(to_config_group(item));
 		if (ret) {
 			configfs_detach_item(item);
 			dentry->d_inode->i_flags |= S_DEAD;
 		}
+		lockdep_off();
 		mutex_unlock(&dentry->d_inode->i_mutex);
+		lockdep_on();
 		if (ret)
 			d_delete(dentry);
 	}
@@ -956,7 +990,17 @@ static int configfs_depend_prep(struct dentry *origin,
 	BUG_ON(!origin || !sd);
 
 	/* Lock this guy on the way down */
+	/*
+	 * Note: we hide this from lockdep since we have no way
+	 * to teach lockdep about recursive
+	 * I_MUTEX_PARENT -> I_MUTEX_CHILD patterns along a path
+	 * in an inode tree, which are valid as soon as
+	 * I_MUTEX_PARENT -> I_MUTEX_CHILD is valid from a
+	 * parent inode to one of its children.
+	 */
+	lockdep_off();
 	mutex_lock(&sd->s_dentry->d_inode->i_mutex);
+	lockdep_on();
 	if (sd->s_element == target)  /* Boo-yah */
 		goto out;
 
@@ -970,7 +1014,9 @@ static int configfs_depend_prep(struct dentry *origin,
 	}
 
 	/* We looped all our children and didn't find target */
+	lockdep_off();
 	mutex_unlock(&sd->s_dentry->d_inode->i_mutex);
+	lockdep_on();
 	ret = -ENOENT;
 
 out:
@@ -990,11 +1036,16 @@ static void configfs_depend_rollback(struct dentry *origin,
 	struct dentry *dentry = item->ci_dentry;
 
 	while (dentry != origin) {
+		/* See comments in configfs_depend_prep() */
+		lockdep_off();
 		mutex_unlock(&dentry->d_inode->i_mutex);
+		lockdep_on();
 		dentry = dentry->d_parent;
 	}
 
+	lockdep_off();
 	mutex_unlock(&origin->d_inode->i_mutex);
+	lockdep_on();
 }
 
 int configfs_depend_item(struct configfs_subsystem *subsys,
@@ -1329,8 +1380,16 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 			}
 
 			/* Wait until the racing operation terminates */
+			/*
+			 * Note: we hide this from lockdep since we are locked
+			 * with subclass I_MUTEX_NORMAL from vfs_rmdir() (why
+			 * not I_MUTEX_CHILD?), and I_MUTEX_XATTR or
+			 * I_MUTEX_QUOTA are not relevant for the locked inode.
+			 */
+			lockdep_off();
 			mutex_lock(wait_mutex);
 			mutex_unlock(wait_mutex);
+			lockdep_on();
 		}
 	} while (ret == -EAGAIN);
 
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d861096c9d81..60fe74035db5 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5390,6 +5390,9 @@ int ocfs2_remove_btree_range(struct inode *inode,
 		goto out;
 	}
 
+	vfs_dq_free_space_nodirty(inode,
+				  ocfs2_clusters_to_bytes(inode->i_sb, len));
+
 	ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac,
 				  dealloc);
 	if (ret) {
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b1cc7c381e88..e9d7c2038c0f 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -38,6 +38,7 @@
 #include "dlmglue.h"
 #include "file.h"
 #include "inode.h"
+#include "super.h"
 
 
 static int ocfs2_dentry_revalidate(struct dentry *dentry,
@@ -294,6 +295,34 @@ out_attach:
 	return ret;
 }
 
+static DEFINE_SPINLOCK(dentry_list_lock);
+
+/* We limit the number of dentry locks to drop in one go. We have
+ * this limit so that we don't starve other users of ocfs2_wq. */
+#define DL_INODE_DROP_COUNT 64
+
+/* Drop inode references from dentry locks */
+void ocfs2_drop_dl_inodes(struct work_struct *work)
+{
+	struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
+					       dentry_lock_work);
+	struct ocfs2_dentry_lock *dl;
+	int drop_count = DL_INODE_DROP_COUNT;
+
+	spin_lock(&dentry_list_lock);
+	while (osb->dentry_lock_list && drop_count--) {
+		dl = osb->dentry_lock_list;
+		osb->dentry_lock_list = dl->dl_next;
+		spin_unlock(&dentry_list_lock);
+		iput(dl->dl_inode);
+		kfree(dl);
+		spin_lock(&dentry_list_lock);
+	}
+	if (osb->dentry_lock_list)
+		queue_work(ocfs2_wq, &osb->dentry_lock_work);
+	spin_unlock(&dentry_list_lock);
+}
+
 /*
  * ocfs2_dentry_iput() and friends.
  *
@@ -318,16 +347,23 @@ out_attach:
 static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
 				   struct ocfs2_dentry_lock *dl)
 {
-	iput(dl->dl_inode);
 	ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
 	ocfs2_lock_res_free(&dl->dl_lockres);
-	kfree(dl);
+
+	/* We leave dropping of inode reference to ocfs2_wq as that can
+	 * possibly lead to inode deletion which gets tricky */
+	spin_lock(&dentry_list_lock);
+	if (!osb->dentry_lock_list)
+		queue_work(ocfs2_wq, &osb->dentry_lock_work);
+	dl->dl_next = osb->dentry_lock_list;
+	osb->dentry_lock_list = dl;
+	spin_unlock(&dentry_list_lock);
 }
 
 void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
 			   struct ocfs2_dentry_lock *dl)
 {
-	int unlock = 0;
+	int unlock;
 
 	BUG_ON(dl->dl_count == 0);
 
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index c091c34d9883..d06e16c06640 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -29,8 +29,13 @@
 extern struct dentry_operations ocfs2_dentry_ops;
 
 struct ocfs2_dentry_lock {
+	/* Use count of dentry lock */
 	unsigned int		dl_count;
-	u64			dl_parent_blkno;
+	union {
+		/* Linked list of dentry locks to release */
+		struct ocfs2_dentry_lock *dl_next;
+		u64			dl_parent_blkno;
+	};
 
 	/*
 	 * The ocfs2_dentry_lock keeps an inode reference until
@@ -47,6 +52,8 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
 void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
 			   struct ocfs2_dentry_lock *dl);
 
+void ocfs2_drop_dl_inodes(struct work_struct *work);
+
 struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
 				      int skip_unhashed);
 
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index b0c4cadd4c45..206a2370876a 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2860,6 +2860,10 @@ static void ocfs2_unlock_ast(void *opaque, int error)
 	case OCFS2_UNLOCK_CANCEL_CONVERT:
 		mlog(0, "Cancel convert success for %s\n", lockres->l_name);
 		lockres->l_action = OCFS2_AST_INVALID;
+		/* Downconvert thread may have requeued this lock, we
+		 * need to wake it. */
+		if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
+			ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
 		break;
 	case OCFS2_UNLOCK_DROP_LOCK:
 		lockres->l_level = DLM_LOCK_IV;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index ad5c24a29edd..077384135f4e 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -210,6 +210,7 @@ struct ocfs2_journal;
 struct ocfs2_slot_info;
 struct ocfs2_recovery_map;
 struct ocfs2_quota_recovery;
+struct ocfs2_dentry_lock;
 struct ocfs2_super
 {
 	struct task_struct *commit_task;
@@ -325,6 +326,11 @@ struct ocfs2_super
 	struct list_head blocked_lock_list;
 	unsigned long blocked_lock_count;
 
+	/* List of dentry locks to release. Anyone can add locks to
+	 * the list, ocfs2_wq processes the list  */
+	struct ocfs2_dentry_lock *dentry_lock_list;
+	struct work_struct dentry_lock_work;
+
 	wait_queue_head_t		osb_mount_event;
 
 	/* Truncate log info */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index f4efa89baee5..1ed0f7c86869 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -754,7 +754,9 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
 	if (dquot->dq_flags & mask)
 		sync = 1;
 	spin_unlock(&dq_data_lock);
-	if (!sync) {
+	/* This is a slight hack but we can't afford getting global quota
+	 * lock if we already have a transaction started. */
+	if (!sync || journal_current_handle()) {
 		status = ocfs2_write_dquot(dquot);
 		goto out;
 	}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 43ed11345b59..b1cb38fbe807 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1887,6 +1887,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
 	journal->j_state = OCFS2_JOURNAL_FREE;
 
+	INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes);
+	osb->dentry_lock_list = NULL;
+
 	/* get some pseudo constants for clustersize bits */
 	osb->s_clustersize_bits =
 		le32_to_cpu(di->id2.i_super.s_clustersize_bits);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e1d638af6ac3..915039fffe6e 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -4729,13 +4729,6 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
 	vb.vb_xv = (struct ocfs2_xattr_value_root *)
 		(vb.vb_bh->b_data + offset % blocksize);
 
-	ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
-						OCFS2_JOURNAL_ACCESS_WRITE);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
-
 	/*
 	 * From here on out we have to dirty the bucket.  The generic
 	 * value calls only modify one of the bucket's bhs, but we need
@@ -4748,12 +4741,18 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
 	ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_dirty;
+		goto out;
+	}
+
+	ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
+						OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
 	}
 
 	xe->xe_value_size = cpu_to_le64(len);
 
-out_dirty:
 	ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
 
 out:
author	Linus Torvalds <torvalds@linux-foundation.org>	2009-02-03 16:50:20 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-02-03 16:50:20 -0800
commit	3e1c4005134e3a090c64c1bc35f965043bb451f4 (patch)
tree	aae42aa4706ac44091eb119776183f2c61f0aed0 /fs
parent	b987e8e5a986e77069c99fda5da6878751808fe9 (diff)
parent	fd4ef231962ab44fd1004e87f9d7c6809f00cd64 (diff)