Merge branch 'for-2.6.31' of git://fieldses.org/git/linux-nfsd

* 'for-2.6.31' of git://fieldses.org/git/linux-nfsd: (60 commits) SUNRPC: Fix the TCP server's send buffer accounting nfsd41: Backchannel: minorversion support for the back channel nfsd41: Backchannel: cleanup nfs4.0 callback encode routines nfsd41: Remove ip address collision detection case nfsd: optimise the starting of zero threads when none are running. nfsd: don't take nfsd_mutex twice when setting number of threads. nfsd41: sanity check client drc maxreqs nfsd41: move channel attributes from nfsd4_session to a nfsd4_channel_attr struct NFS: kill off complicated macro 'PROC' sunrpc: potential memory leak in function rdma_read_xdr nfsd: minor nfsd_vfs_write cleanup nfsd: Pull write-gathering code out of nfsd_vfs_write nfsd: track last inode only in use_wgather case sunrpc: align cache_clean work's timer nfsd: Use write gathering only with NFSv2 NFSv4: kill off complicated macro 'PROC' NFSv4: do exact check about attribute specified knfsd: remove unreported filehandle stats counters knfsd: fix reply cache memory corruption knfsd: reply cache cleanups ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2009-06-22 12:55:50 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-06-22 12:55:50 -0700
commit: 7e0338c0de18c50f09aea1fbef45110cf7d64a3c (patch)
tree: 30a935c1f6eee7125a9fbb802a33292b1f7268fa /fs/nfsd/vfs.c
parent: df36b439c5fedefe013d4449cb6a50d15e2f4d70 (diff)
parent: 47fcb03fefee2501e79176932a4184fc24d6f8ec (diff)
1 files changed, 50 insertions, 43 deletions
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 99f83575359..4145083dcf8 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -966,6 +966,43 @@ static void kill_suid(struct dentry *dentry)
 	mutex_unlock(&dentry->d_inode->i_mutex);
 }
 
+/*
+ * Gathered writes: If another process is currently writing to the file,
+ * there's a high chance this is another nfsd (triggered by a bulk write
+ * from a client's biod). Rather than syncing the file with each write
+ * request, we sleep for 10 msec.
+ *
+ * I don't know if this roughly approximates C. Juszak's idea of
+ * gathered writes, but it's a nice and simple solution (IMHO), and it
+ * seems to work:-)
+ *
+ * Note: we do this only in the NFSv2 case, since v3 and higher have a
+ * better tool (separate unstable writes and commits) for solving this
+ * problem.
+ */
+static int wait_for_concurrent_writes(struct file *file)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	static ino_t last_ino;
+	static dev_t last_dev;
+	int err = 0;
+
+	if (atomic_read(&inode->i_writecount) > 1
+	    || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
+		dprintk("nfsd: write defer %d\n", task_pid_nr(current));
+		msleep(10);
+		dprintk("nfsd: write resume %d\n", task_pid_nr(current));
+	}
+
+	if (inode->i_state & I_DIRTY) {
+		dprintk("nfsd: write sync %d\n", task_pid_nr(current));
+		err = nfsd_sync(file);
+	}
+	last_ino = inode->i_ino;
+	last_dev = inode->i_sb->s_dev;
+	return err;
+}
+
 static __be32
 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 				loff_t offset, struct kvec *vec, int vlen,
@@ -978,6 +1015,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	__be32			err = 0;
 	int			host_err;
 	int			stable = *stablep;
+	int			use_wgather;
 
 #ifdef MSNFS
 	err = nfserr_perm;
@@ -996,9 +1034,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	 *  -	the sync export option has been set, or
 	 *  -	the client requested O_SYNC behavior (NFSv3 feature).
 	 *  -   The file system doesn't support fsync().
-	 * When gathered writes have been configured for this volume,
+	 * When NFSv2 gathered writes have been configured for this volume,
 	 * flushing the data to disk is handled separately below.
 	 */
+	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
 
 	if (!file->f_op->fsync) {/* COMMIT3 cannot work */
 	       stable = 2;
@@ -1007,7 +1046,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 
 	if (!EX_ISSYNC(exp))
 		stable = 0;
-	if (stable && !EX_WGATHER(exp)) {
+	if (stable && !use_wgather) {
 		spin_lock(&file->f_lock);
 		file->f_flags |= O_SYNC;
 		spin_unlock(&file->f_lock);
@@ -1017,52 +1056,20 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	oldfs = get_fs(); set_fs(KERNEL_DS);
 	host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
 	set_fs(oldfs);
-	if (host_err >= 0) {
-		*cnt = host_err;
-		nfsdstats.io_write += host_err;
-		fsnotify_modify(file->f_path.dentry);
-	}
+	if (host_err < 0)
+		goto out_nfserr;
+	*cnt = host_err;
+	nfsdstats.io_write += host_err;
+	fsnotify_modify(file->f_path.dentry);
 
 	/* clear setuid/setgid flag after write */
-	if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
+	if (inode->i_mode & (S_ISUID | S_ISGID))
 		kill_suid(dentry);
 
-	if (host_err >= 0 && stable) {
-		static ino_t	last_ino;
-		static dev_t	last_dev;
-
-		/*
-		 * Gathered writes: If another process is currently
-		 * writing to the file, there's a high chance
-		 * this is another nfsd (triggered by a bulk write
-		 * from a client's biod). Rather than syncing the
-		 * file with each write request, we sleep for 10 msec.
-		 *
-		 * I don't know if this roughly approximates
-		 * C. Juszak's idea of gathered writes, but it's a
-		 * nice and simple solution (IMHO), and it seems to
-		 * work:-)
-		 */
-		if (EX_WGATHER(exp)) {
-			if (atomic_read(&inode->i_writecount) > 1
-			    || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
-				dprintk("nfsd: write defer %d\n", task_pid_nr(current));
-				msleep(10);
-				dprintk("nfsd: write resume %d\n", task_pid_nr(current));
-			}
-
-			if (inode->i_state & I_DIRTY) {
-				dprintk("nfsd: write sync %d\n", task_pid_nr(current));
-				host_err=nfsd_sync(file);
-			}
-#if 0
-			wake_up(&inode->i_wait);
-#endif
-		}
-		last_ino = inode->i_ino;
-		last_dev = inode->i_sb->s_dev;
-	}
+	if (stable && use_wgather)
+		host_err = wait_for_concurrent_writes(file);
 
+out_nfserr:
 	dprintk("nfsd: write complete host_err=%d\n", host_err);
 	if (host_err >= 0)
 		err = 0;
author	Linus Torvalds <torvalds@linux-foundation.org>	2009-06-22 12:55:50 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-06-22 12:55:50 -0700
commit	7e0338c0de18c50f09aea1fbef45110cf7d64a3c (patch)
tree	30a935c1f6eee7125a9fbb802a33292b1f7268fa /fs/nfsd/vfs.c
parent	df36b439c5fedefe013d4449cb6a50d15e2f4d70 (diff)
parent	47fcb03fefee2501e79176932a4184fc24d6f8ec (diff)