blob: 2d413930f235de8e56b04fe5ff79b80dd1e24e01 [file] [log] [blame]
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -04001/******************************************************************************
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -04002 *
3 * Back-end of the driver for virtual block devices. This portion of the
4 * driver exports a 'unified' block-device interface that can be accessed
5 * by any operating system that implements a compatible front end. A
6 * reference front-end implementation can be found in:
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -04007 * drivers/block/xen-blkfront.c
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -04008 *
9 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
10 * Copyright (c) 2005, Christopher Clark
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License version 2
14 * as published by the Free Software Foundation; or, when distributed
15 * separately from the Linux kernel or incorporated into other
16 * software packages, subject to the following license:
17 *
18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this source file (the "Software"), to deal in the Software without
20 * restriction, including without limitation the rights to use, copy, modify,
21 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
22 * and to permit persons to whom the Software is furnished to do so, subject to
23 * the following conditions:
24 *
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
34 * IN THE SOFTWARE.
35 */
36
37#include <linux/spinlock.h>
38#include <linux/kthread.h>
39#include <linux/list.h>
40#include <linux/delay.h>
Jeremy Fitzhardinge88122932009-02-09 12:05:51 -080041#include <linux/freezer.h>
Jeremy Fitzhardingeafd91d02009-09-15 14:12:37 -070042
Jeremy Fitzhardinge88122932009-02-09 12:05:51 -080043#include <xen/events.h>
44#include <xen/page.h>
45#include <asm/xen/hypervisor.h>
46#include <asm/xen/hypercall.h>
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -040047#include "common.h"
48
Tom Goetz314146e2011-03-17 12:14:29 -040049#define WRITE_BARRIER (REQ_WRITE | REQ_FLUSH | REQ_FUA)
50
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -040051/*
52 * These are rather arbitrary. They are fairly large because adjacent requests
53 * pulled from a communication ring are quite likely to end up being part of
54 * the same scatter/gather request at the disc.
55 *
56 * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
57 *
58 * This will increase the chances of being able to write whole tracks.
59 * 64 should be enough to keep us competitive with Linux.
60 */
61static int blkif_reqs = 64;
62module_param_named(reqs, blkif_reqs, int, 0);
63MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
64
65/* Run-time switchable: /sys/module/blkback/parameters/ */
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -040066static unsigned int log_stats;
67static unsigned int debug_lvl;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -040068module_param(log_stats, int, 0644);
69module_param(debug_lvl, int, 0644);
70
71/*
72 * Each outstanding request that we've passed to the lower device layers has a
73 * 'pending_req' allocated to it. Each buffer_head that completes decrements
74 * the pendcnt towards zero. When it hits zero, the specified domain has a
75 * response queued for it, with the saved 'id' passed back.
76 */
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -040077struct pending_req {
Konrad Rzeszutek Wilk54893772011-04-14 17:21:50 -040078 struct blkif_st *blkif;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -040079 u64 id;
80 int nr_pages;
81 atomic_t pendcnt;
82 unsigned short operation;
83 int status;
84 struct list_head free_list;
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -040085};
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -040086
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -040087#define BLKBACK_INVALID_HANDLE (~0)
88
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -050089struct xen_blkbk {
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -040090 struct pending_req *pending_reqs;
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -040091 /* List of all 'pending_req' available */
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -050092 struct list_head pending_free;
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -040093 /* And its spinlock. */
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -050094 spinlock_t pending_free_lock;
95 wait_queue_head_t pending_free_wq;
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -040096 /* The list of all pages that are available. */
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -050097 struct page **pending_pages;
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -040098 /* And the grant handles that are available. */
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -050099 grant_handle_t *pending_grant_handles;
100};
101
102static struct xen_blkbk *blkbk;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400103
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400104/*
105 * Little helpful macro to figure out the index and virtual address of the
106 * pending_pages[..]. For each 'pending_req' we have have up to
107 * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
108 * 10 and would index in the pending_pages[..]. */
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400109static inline int vaddr_pagenr(struct pending_req *req, int seg)
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400110{
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400111 return (req - blkbk->pending_reqs) *
112 BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400113}
114
Jan Beulichefe08a32010-02-05 14:19:33 -0500115#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
116
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400117static inline unsigned long vaddr(struct pending_req *req, int seg)
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400118{
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -0500119 unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg));
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400120 return (unsigned long)pfn_to_kaddr(pfn);
121}
122
123#define pending_handle(_req, _seg) \
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -0500124 (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)])
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400125
126
Konrad Rzeszutek Wilk54893772011-04-14 17:21:50 -0400127static int do_block_io_op(struct blkif_st *blkif);
128static void dispatch_rw_block_io(struct blkif_st *blkif,
Jeremy Fitzhardinge88122932009-02-09 12:05:51 -0800129 struct blkif_request *req,
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400130 struct pending_req *pending_req);
Konrad Rzeszutek Wilk54893772011-04-14 17:21:50 -0400131static void make_response(struct blkif_st *blkif, u64 id,
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400132 unsigned short op, int st);
133
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400134/*
135 * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400136 */
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400137static struct pending_req *alloc_req(void)
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400138{
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400139 struct pending_req *req = NULL;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400140 unsigned long flags;
141
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -0500142 spin_lock_irqsave(&blkbk->pending_free_lock, flags);
143 if (!list_empty(&blkbk->pending_free)) {
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400144 req = list_entry(blkbk->pending_free.next, struct pending_req,
145 free_list);
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400146 list_del(&req->free_list);
147 }
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -0500148 spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400149 return req;
150}
151
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400152/*
153 * Return the 'pending_req' structure back to the freepool. We also
154 * wake up the thread if it was waiting for a free page.
155 */
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400156static void free_req(struct pending_req *req)
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400157{
158 unsigned long flags;
159 int was_empty;
160
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -0500161 spin_lock_irqsave(&blkbk->pending_free_lock, flags);
162 was_empty = list_empty(&blkbk->pending_free);
163 list_add(&req->free_list, &blkbk->pending_free);
164 spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400165 if (was_empty)
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -0500166 wake_up(&blkbk->pending_free_wq);
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400167}
168
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400169/*
170 * Give back a reference count on the underlaying storage.
171 * It is OK to make multiple calls in this function as it
172 * resets the plug to NULL when it is done on the first call.
173 */
Konrad Rzeszutek Wilk54893772011-04-14 17:21:50 -0400174static void unplug_queue(struct blkif_st *blkif)
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400175{
176 if (blkif->plug == NULL)
177 return;
178 if (blkif->plug->unplug_fn)
179 blkif->plug->unplug_fn(blkif->plug);
180 blk_put_queue(blkif->plug);
181 blkif->plug = NULL;
182}
183
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400184/*
185 * Take a reference count on the underlaying storage.
186 * It is OK to call this multiple times as we check to make sure
187 * not to double reference. We also give back a reference count
188 * if it corresponds to another queue.
189 */
Konrad Rzeszutek Wilk54893772011-04-14 17:21:50 -0400190static void plug_queue(struct blkif_st *blkif, struct block_device *bdev)
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400191{
Jeremy Fitzhardinge88122932009-02-09 12:05:51 -0800192 struct request_queue *q = bdev_get_queue(bdev);
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400193
194 if (q == blkif->plug)
195 return;
196 unplug_queue(blkif);
197 blk_get_queue(q);
198 blkif->plug = q;
199}
200
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400201/*
202 * Unmap the grant references, and also remove the M2P over-rides
203 * used in the 'pending_req'.
204*/
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400205static void fast_flush_area(struct pending_req *req)
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400206{
207 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
208 unsigned int i, invcount = 0;
209 grant_handle_t handle;
210 int ret;
211
212 for (i = 0; i < req->nr_pages; i++) {
213 handle = pending_handle(req, i);
214 if (handle == BLKBACK_INVALID_HANDLE)
215 continue;
216 gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
217 GNTMAP_host_map, handle);
218 pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
219 invcount++;
220 }
221
222 ret = HYPERVISOR_grant_table_op(
223 GNTTABOP_unmap_grant_ref, unmap, invcount);
224 BUG_ON(ret);
Konrad Rzeszutek Wilk5dc03632011-03-01 16:46:45 -0500225 /* Note, we use invcount, so nr->pages, so we can't index
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400226 * using vaddr(req, i).
227 */
Konrad Rzeszutek Wilk5dc03632011-03-01 16:46:45 -0500228 for (i = 0; i < invcount; i++) {
229 ret = m2p_remove_override(
230 virt_to_page(unmap[i].host_addr), false);
231 if (ret) {
232 printk(KERN_ALERT "Failed to remove M2P override for " \
233 "%lx\n", (unsigned long)unmap[i].host_addr);
234 continue;
235 }
236 }
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400237}
238
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400239/*
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400240 * SCHEDULER FUNCTIONS
241 */
242
Konrad Rzeszutek Wilk54893772011-04-14 17:21:50 -0400243static void print_stats(struct blkif_st *blkif)
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400244{
245 printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n",
246 current->comm, blkif->st_oo_req,
247 blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
248 blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
249 blkif->st_rd_req = 0;
250 blkif->st_wr_req = 0;
251 blkif->st_oo_req = 0;
252}
253
254int blkif_schedule(void *arg)
255{
Konrad Rzeszutek Wilk54893772011-04-14 17:21:50 -0400256 struct blkif_st *blkif = arg;
K. Y. Srinivasan2ccbfe22010-03-11 13:39:50 -0800257 struct vbd *vbd = &blkif->vbd;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400258
259 blkif_get(blkif);
260
261 if (debug_lvl)
262 printk(KERN_DEBUG "%s: started\n", current->comm);
263
264 while (!kthread_should_stop()) {
265 if (try_to_freeze())
266 continue;
K. Y. Srinivasan2ccbfe22010-03-11 13:39:50 -0800267 if (unlikely(vbd->size != vbd_size(vbd)))
268 vbd_resize(blkif);
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400269
270 wait_event_interruptible(
271 blkif->wq,
272 blkif->waiting_reqs || kthread_should_stop());
273 wait_event_interruptible(
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -0500274 blkbk->pending_free_wq,
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400275 !list_empty(&blkbk->pending_free) ||
276 kthread_should_stop());
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400277
278 blkif->waiting_reqs = 0;
279 smp_mb(); /* clear flag *before* checking for work */
280
281 if (do_block_io_op(blkif))
282 blkif->waiting_reqs = 1;
283 unplug_queue(blkif);
284
285 if (log_stats && time_after(jiffies, blkif->st_print))
286 print_stats(blkif);
287 }
288
289 if (log_stats)
290 print_stats(blkif);
291 if (debug_lvl)
292 printk(KERN_DEBUG "%s: exiting\n", current->comm);
293
294 blkif->xenblkd = NULL;
295 blkif_put(blkif);
296
297 return 0;
298}
299
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400300/*
301 * Completion callback on the bio's. Called as bh->b_end_io()
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400302 */
303
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400304static void __end_block_io_op(struct pending_req *pending_req, int error)
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400305{
306 /* An error fails the entire request. */
307 if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
308 (error == -EOPNOTSUPP)) {
309 DPRINTK("blkback: write barrier op failed, not supported\n");
310 blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
311 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
312 } else if (error) {
313 DPRINTK("Buffer not up-to-date at end of operation, "
314 "error=%d\n", error);
315 pending_req->status = BLKIF_RSP_ERROR;
316 }
317
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400318 /* If all of the bio's have completed it is time to unmap
319 * the grant references associated with 'request' and provide
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400320 * the proper response on the ring.
321 */
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400322 if (atomic_dec_and_test(&pending_req->pendcnt)) {
323 fast_flush_area(pending_req);
324 make_response(pending_req->blkif, pending_req->id,
325 pending_req->operation, pending_req->status);
326 blkif_put(pending_req->blkif);
327 free_req(pending_req);
328 }
329}
330
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400331/*
332 * bio callback.
333 */
Jeremy Fitzhardinge88122932009-02-09 12:05:51 -0800334static void end_block_io_op(struct bio *bio, int error)
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400335{
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400336 __end_block_io_op(bio->bi_private, error);
337 bio_put(bio);
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400338}
339
340
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400341/*
342 * Notification from the guest OS.
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400343 */
344
Konrad Rzeszutek Wilk54893772011-04-14 17:21:50 -0400345static void blkif_notify_work(struct blkif_st *blkif)
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400346{
347 blkif->waiting_reqs = 1;
348 wake_up(&blkif->wq);
349}
350
Jeremy Fitzhardinge88122932009-02-09 12:05:51 -0800351irqreturn_t blkif_be_int(int irq, void *dev_id)
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400352{
353 blkif_notify_work(dev_id);
354 return IRQ_HANDLED;
355}
356
357
358
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400359/*
360 * Function to copy the from the ring buffer the 'struct blkif_request'
361 * (which has the sectors we want, number of them, grant references, etc),
362 * and transmute it to the block API to hand it over to the proper block disk.
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400363 */
Konrad Rzeszutek Wilk54893772011-04-14 17:21:50 -0400364static int do_block_io_op(struct blkif_st *blkif)
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400365{
Jeremy Fitzhardinge88122932009-02-09 12:05:51 -0800366 union blkif_back_rings *blk_rings = &blkif->blk_rings;
367 struct blkif_request req;
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400368 struct pending_req *pending_req;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400369 RING_IDX rc, rp;
370 int more_to_do = 0;
371
372 rc = blk_rings->common.req_cons;
373 rp = blk_rings->common.sring->req_prod;
374 rmb(); /* Ensure we see queued requests up to 'rp'. */
375
376 while (rc != rp) {
377
378 if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
379 break;
380
Keir Fraser8270b452009-03-06 08:29:15 +0000381 if (kthread_should_stop()) {
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400382 more_to_do = 1;
383 break;
384 }
385
Keir Fraser8270b452009-03-06 08:29:15 +0000386 pending_req = alloc_req();
387 if (NULL == pending_req) {
388 blkif->st_oo_req++;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400389 more_to_do = 1;
390 break;
391 }
392
393 switch (blkif->blk_protocol) {
394 case BLKIF_PROTOCOL_NATIVE:
395 memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
396 break;
397 case BLKIF_PROTOCOL_X86_32:
398 blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
399 break;
400 case BLKIF_PROTOCOL_X86_64:
401 blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
402 break;
403 default:
404 BUG();
405 }
406 blk_rings->common.req_cons = ++rc; /* before make_response() */
407
408 /* Apply all sanity checks to /private copy/ of request. */
409 barrier();
410
411 switch (req.operation) {
412 case BLKIF_OP_READ:
413 blkif->st_rd_req++;
414 dispatch_rw_block_io(blkif, &req, pending_req);
415 break;
416 case BLKIF_OP_WRITE_BARRIER:
417 blkif->st_br_req++;
418 /* fall through */
419 case BLKIF_OP_WRITE:
420 blkif->st_wr_req++;
421 dispatch_rw_block_io(blkif, &req, pending_req);
422 break;
423 default:
424 /* A good sign something is wrong: sleep for a while to
425 * avoid excessive CPU consumption by a bad guest. */
426 msleep(1);
427 DPRINTK("error: unknown block io operation [%d]\n",
428 req.operation);
429 make_response(blkif, req.id, req.operation,
430 BLKIF_RSP_ERROR);
431 free_req(pending_req);
432 break;
433 }
434
435 /* Yield point for this unbounded loop. */
436 cond_resched();
437 }
438
439 return more_to_do;
440}
441
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400442/*
443 * Transumation of the 'struct blkif_request' to a proper 'struct bio'
444 * and call the 'submit_bio' to pass it to the underlaying storage.
445 */
Konrad Rzeszutek Wilk54893772011-04-14 17:21:50 -0400446static void dispatch_rw_block_io(struct blkif_st *blkif,
Jeremy Fitzhardinge88122932009-02-09 12:05:51 -0800447 struct blkif_request *req,
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400448 struct pending_req *pending_req)
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400449{
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400450 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
451 struct phys_req preq;
452 struct {
453 unsigned long buf; unsigned int nsec;
454 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
455 unsigned int nseg;
456 struct bio *bio = NULL;
457 int ret, i;
458 int operation;
459
460 switch (req->operation) {
461 case BLKIF_OP_READ:
462 operation = READ;
463 break;
464 case BLKIF_OP_WRITE:
465 operation = WRITE;
466 break;
467 case BLKIF_OP_WRITE_BARRIER:
Tom Goetz314146e2011-03-17 12:14:29 -0400468 operation = WRITE_BARRIER;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400469 break;
470 default:
471 operation = 0; /* make gcc happy */
472 BUG();
473 }
474
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400475 /* Check that the number of segments is sane. */
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400476 nseg = req->nr_segments;
Tom Goetz314146e2011-03-17 12:14:29 -0400477 if (unlikely(nseg == 0 && operation != WRITE_BARRIER) ||
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400478 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
479 DPRINTK("Bad number of segments in request (%d)\n", nseg);
480 goto fail_response;
481 }
482
483 preq.dev = req->handle;
Konrad Rzeszutek Wilkc35950b2011-03-01 16:22:28 -0500484 preq.sector_number = req->u.rw.sector_number;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400485 preq.nr_sects = 0;
486
487 pending_req->blkif = blkif;
488 pending_req->id = req->id;
489 pending_req->operation = req->operation;
490 pending_req->status = BLKIF_RSP_OKAY;
491 pending_req->nr_pages = nseg;
492
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400493 /* Fill out preq.nr_sects with proper amount of sectors, and setup
494 * assign map[..] with the PFN of the page in our domain with the
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400495 * corresponding grant reference for each page.
496 */
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400497 for (i = 0; i < nseg; i++) {
498 uint32_t flags;
499
Konrad Rzeszutek Wilkc35950b2011-03-01 16:22:28 -0500500 seg[i].nsec = req->u.rw.seg[i].last_sect -
501 req->u.rw.seg[i].first_sect + 1;
Konrad Rzeszutek Wilkc35950b2011-03-01 16:22:28 -0500502 if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
503 (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect))
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400504 goto fail_response;
505 preq.nr_sects += seg[i].nsec;
506
507 flags = GNTMAP_host_map;
508 if (operation != READ)
509 flags |= GNTMAP_readonly;
510 gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
Konrad Rzeszutek Wilkc35950b2011-03-01 16:22:28 -0500511 req->u.rw.seg[i].gref, blkif->domid);
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400512 }
513
514 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
515 BUG_ON(ret);
516
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400517 /* Now swizzel the MFN in our domain with the MFN from the other domain
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400518 * so that when we access vaddr(pending_req,i) it has the contents of
519 * the page from the other domain.
520 */
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400521 for (i = 0; i < nseg; i++) {
522 if (unlikely(map[i].status != 0)) {
523 DPRINTK("invalid buffer -- could not remap it\n");
524 map[i].handle = BLKBACK_INVALID_HANDLE;
525 ret |= 1;
526 }
527
528 pending_handle(pending_req, i) = map[i].handle;
529
530 if (ret)
531 continue;
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400532
Konrad Rzeszutek Wilk5dc03632011-03-01 16:46:45 -0500533 ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr),
534 blkbk->pending_page(pending_req, i), false);
535 if (ret) {
536 printk(KERN_ALERT "Failed to install M2P override for"\
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400537 " %lx (ret: %d)\n", (unsigned long)
538 map[i].dev_bus_addr, ret);
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400539 /* We could switch over to GNTTABOP_copy */
Konrad Rzeszutek Wilk5dc03632011-03-01 16:46:45 -0500540 continue;
541 }
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400542
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400543 seg[i].buf = map[i].dev_bus_addr |
Konrad Rzeszutek Wilkc35950b2011-03-01 16:22:28 -0500544 (req->u.rw.seg[i].first_sect << 9);
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400545 }
546
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400547 /* If we have failed at this point, we need to undo the M2P override,
548 * set gnttab_set_unmap_op on all of the grant references and perform
549 * the hypercall to unmap the grants - that is all done in
550 * fast_flush_area.
551 */
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400552 if (ret)
553 goto fail_flush;
554
555 if (vbd_translate(&preq, blkif, operation) != 0) {
556 DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
557 operation == READ ? "read" : "write",
558 preq.sector_number,
559 preq.sector_number + preq.nr_sects, preq.dev);
560 goto fail_flush;
561 }
562
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400563 /* Get a reference count for the disk queue and start sending I/O */
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400564 plug_queue(blkif, preq.bdev);
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400565
566 /* We set it one so that the last submit_bio does not have to call
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400567 * atomic_inc.
568 */
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400569 atomic_set(&pending_req->pendcnt, 1);
570 blkif_get(blkif);
571
572 for (i = 0; i < nseg; i++) {
573 if (((int)preq.sector_number|(int)seg[i].nsec) &
Jeremy Fitzhardinge05d43862009-06-29 14:58:45 -0700574 ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400575 DPRINTK("Misaligned I/O request from domain %d",
576 blkif->domid);
577 goto fail_put_bio;
578 }
579
580 while ((bio == NULL) ||
581 (bio_add_page(bio,
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -0500582 blkbk->pending_page(pending_req, i),
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400583 seg[i].nsec << 9,
584 seg[i].buf & ~PAGE_MASK) == 0)) {
585 if (bio) {
586 atomic_inc(&pending_req->pendcnt);
587 submit_bio(operation, bio);
588 }
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400589
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400590 bio = bio_alloc(GFP_KERNEL, nseg-i);
591 if (unlikely(bio == NULL))
592 goto fail_put_bio;
593
594 bio->bi_bdev = preq.bdev;
595 bio->bi_private = pending_req;
596 bio->bi_end_io = end_block_io_op;
597 bio->bi_sector = preq.sector_number;
598 }
599
600 preq.sector_number += seg[i].nsec;
601 }
602
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400603 /* This will be hit if the operation was a barrier. */
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400604 if (!bio) {
Tom Goetz314146e2011-03-17 12:14:29 -0400605 BUG_ON(operation != WRITE_BARRIER);
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400606 bio = bio_alloc(GFP_KERNEL, 0);
607 if (unlikely(bio == NULL))
608 goto fail_put_bio;
609
610 bio->bi_bdev = preq.bdev;
611 bio->bi_private = pending_req;
612 bio->bi_end_io = end_block_io_op;
613 bio->bi_sector = -1;
614 }
615
616 submit_bio(operation, bio);
617
618 if (operation == READ)
619 blkif->st_rd_sect += preq.nr_sects;
Tom Goetz314146e2011-03-17 12:14:29 -0400620 else if (operation == WRITE || operation == WRITE_BARRIER)
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400621 blkif->st_wr_sect += preq.nr_sects;
622
623 return;
624
625 fail_flush:
626 fast_flush_area(pending_req);
627 fail_response:
628 make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
629 free_req(pending_req);
630 msleep(1); /* back off a bit */
631 return;
632
633 fail_put_bio:
634 __end_block_io_op(pending_req, -EINVAL);
635 if (bio)
636 bio_put(bio);
637 unplug_queue(blkif);
638 msleep(1); /* back off a bit */
639 return;
640}
641
642
643
Konrad Rzeszutek Wilka1397fa2011-04-14 17:05:23 -0400644/*
645 * Put a response on the ring on how the operation fared.
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400646 */
Konrad Rzeszutek Wilk54893772011-04-14 17:21:50 -0400647static void make_response(struct blkif_st *blkif, u64 id,
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400648 unsigned short op, int st)
649{
Jeremy Fitzhardinge88122932009-02-09 12:05:51 -0800650 struct blkif_response resp;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400651 unsigned long flags;
Jeremy Fitzhardinge88122932009-02-09 12:05:51 -0800652 union blkif_back_rings *blk_rings = &blkif->blk_rings;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400653 int more_to_do = 0;
654 int notify;
655
656 resp.id = id;
657 resp.operation = op;
658 resp.status = st;
659
660 spin_lock_irqsave(&blkif->blk_ring_lock, flags);
661 /* Place on the response ring for the relevant domain. */
662 switch (blkif->blk_protocol) {
663 case BLKIF_PROTOCOL_NATIVE:
664 memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
665 &resp, sizeof(resp));
666 break;
667 case BLKIF_PROTOCOL_X86_32:
668 memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
669 &resp, sizeof(resp));
670 break;
671 case BLKIF_PROTOCOL_X86_64:
672 memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
673 &resp, sizeof(resp));
674 break;
675 default:
676 BUG();
677 }
678 blk_rings->common.rsp_prod_pvt++;
679 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
680 if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
681 /*
682 * Tail check for pending requests. Allows frontend to avoid
683 * notifications if requests are already in flight (lower
684 * overheads and promotes batching).
685 */
686 RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
687
688 } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
689 more_to_do = 1;
690 }
691
692 spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
693
694 if (more_to_do)
695 blkif_notify_work(blkif);
696 if (notify)
697 notify_remote_via_irq(blkif->irq);
698}
699
700static int __init blkif_init(void)
701{
702 int i, mmap_pages;
Konrad Rzeszutek Wilk8770b262009-10-08 13:23:09 -0400703 int rc = 0;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400704
Jeremy Fitzhardinge88122932009-02-09 12:05:51 -0800705 if (!xen_pv_domain())
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400706 return -ENODEV;
707
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400708 blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -0500709 if (!blkbk) {
710 printk(KERN_ALERT "%s: out of memory!\n", __func__);
711 return -ENOMEM;
712 }
713
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400714 mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
715
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -0500716 blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) *
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400717 blkif_reqs, GFP_KERNEL);
Konrad Rzeszutek Wilka742b022011-03-14 12:41:26 -0400718 blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) *
719 mmap_pages, GFP_KERNEL);
720 blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) *
721 mmap_pages, GFP_KERNEL);
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400722
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400723 if (!blkbk->pending_reqs || !blkbk->pending_grant_handles ||
724 !blkbk->pending_pages) {
Konrad Rzeszutek Wilk8770b262009-10-08 13:23:09 -0400725 rc = -ENOMEM;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400726 goto out_of_memory;
Konrad Rzeszutek Wilk8770b262009-10-08 13:23:09 -0400727 }
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400728
Konrad Rzeszutek Wilk464fb412011-03-01 16:26:10 -0500729 for (i = 0; i < mmap_pages; i++) {
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -0500730 blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
Konrad Rzeszutek Wilka742b022011-03-14 12:41:26 -0400731 blkbk->pending_pages[i] = alloc_page(GFP_KERNEL);
Konrad Rzeszutek Wilk464fb412011-03-01 16:26:10 -0500732 if (blkbk->pending_pages[i] == NULL) {
733 rc = -ENOMEM;
734 goto out_of_memory;
735 }
736 }
Konrad Rzeszutek Wilk8770b262009-10-08 13:23:09 -0400737 rc = blkif_interface_init();
738 if (rc)
739 goto failed_init;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400740
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -0500741 memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));
742
743 INIT_LIST_HEAD(&blkbk->pending_free);
744 spin_lock_init(&blkbk->pending_free_lock);
745 init_waitqueue_head(&blkbk->pending_free_wq);
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400746
747 for (i = 0; i < blkif_reqs; i++)
Konrad Rzeszutek Wilk2e9977c2011-04-14 17:42:07 -0400748 list_add_tail(&blkbk->pending_reqs[i].free_list,
749 &blkbk->pending_free);
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400750
Konrad Rzeszutek Wilk8770b262009-10-08 13:23:09 -0400751 rc = blkif_xenbus_init();
752 if (rc)
753 goto failed_init;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400754
755 return 0;
756
757 out_of_memory:
Konrad Rzeszutek Wilk8770b262009-10-08 13:23:09 -0400758 printk(KERN_ERR "%s: out of memory\n", __func__);
759 failed_init:
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -0500760 kfree(blkbk->pending_reqs);
Konrad Rzeszutek Wilka742b022011-03-14 12:41:26 -0400761 kfree(blkbk->pending_grant_handles);
Konrad Rzeszutek Wilk464fb412011-03-01 16:26:10 -0500762 for (i = 0; i < mmap_pages; i++) {
763 if (blkbk->pending_pages[i])
764 __free_page(blkbk->pending_pages[i]);
765 }
Konrad Rzeszutek Wilka742b022011-03-14 12:41:26 -0400766 kfree(blkbk->pending_pages);
767 kfree(blkbk);
Konrad Rzeszutek Wilke8e28872011-02-25 10:51:29 -0500768 blkbk = NULL;
Konrad Rzeszutek Wilk8770b262009-10-08 13:23:09 -0400769 return rc;
Konrad Rzeszutek Wilk4d05a282011-04-14 18:25:47 -0400770}
771
772module_init(blkif_init);
773
774MODULE_LICENSE("Dual BSD/GPL");