SUNRPC: Eliminate side effects from rpc_malloc

Currently rpc_malloc sets req->rq_buffer internally.  Make this a more
generic interface:  return a pointer to the new buffer (or NULL) and
make the caller set req->rq_buffer and req->rq_bufsize.  This looks much
more like kmalloc and eliminates the side effects.

To fix a potential deadlock, this patch also replaces GFP_NOFS with
GFP_NOWAIT in rpc_malloc.  This prevents async RPCs from sleeping outside
the RPC's task scheduler while allocating their buffer.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6d87320..4a53e94 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -741,50 +741,53 @@
  * @task: RPC task that will use this buffer
  * @size: requested byte size
  *
- * We try to ensure that some NFS reads and writes can always proceed
- * by using a mempool when allocating 'small' buffers.
+ * To prevent rpciod from hanging, this allocator never sleeps,
+ * returning NULL if the request cannot be serviced immediately.
+ * The caller can arrange to sleep in a way that is safe for rpciod.
+ *
+ * Most requests are 'small' (under 2KiB) and can be serviced from a
+ * mempool, ensuring that NFS reads and writes can always proceed,
+ * and that there is good locality of reference for these buffers.
+ *
  * In order to avoid memory starvation triggering more writebacks of
- * NFS requests, we use GFP_NOFS rather than GFP_KERNEL.
+ * NFS requests, we avoid using GFP_KERNEL.
  */
-void * rpc_malloc(struct rpc_task *task, size_t size)
+void *rpc_malloc(struct rpc_task *task, size_t size)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
-	gfp_t	gfp;
+	size_t *buf;
+	gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT;
 
-	if (task->tk_flags & RPC_TASK_SWAPPER)
-		gfp = GFP_ATOMIC;
+	size += sizeof(size_t);
+	if (size <= RPC_BUFFER_MAXSIZE)
+		buf = mempool_alloc(rpc_buffer_mempool, gfp);
 	else
-		gfp = GFP_NOFS;
-
-	if (size > RPC_BUFFER_MAXSIZE) {
-		req->rq_buffer = kmalloc(size, gfp);
-		if (req->rq_buffer)
-			req->rq_bufsize = size;
-	} else {
-		req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp);
-		if (req->rq_buffer)
-			req->rq_bufsize = RPC_BUFFER_MAXSIZE;
-	}
-	return req->rq_buffer;
+		buf = kmalloc(size, gfp);
+	*buf = size;
+	dprintk("RPC: %5u allocated buffer of size %u at %p\n",
+			task->tk_pid, size, buf);
+	return (void *) ++buf;
 }
 
 /**
  * rpc_free - free buffer allocated via rpc_malloc
- * @task: RPC task with a buffer to be freed
+ * @buffer: buffer to free
  *
  */
-void rpc_free(struct rpc_task *task)
+void rpc_free(void *buffer)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
+	size_t size, *buf = (size_t *) buffer;
 
-	if (req->rq_buffer) {
-		if (req->rq_bufsize == RPC_BUFFER_MAXSIZE)
-			mempool_free(req->rq_buffer, rpc_buffer_mempool);
-		else
-			kfree(req->rq_buffer);
-		req->rq_buffer = NULL;
-		req->rq_bufsize = 0;
-	}
+	if (!buffer)
+		return;
+	size = *buf;
+	buf--;
+
+	dprintk("RPC:       freeing buffer of size %u at %p\n",
+			size, buf);
+	if (size <= RPC_BUFFER_MAXSIZE)
+		mempool_free(buf, rpc_buffer_mempool);
+	else
+		kfree(buf);
 }
 
 /*