blob: 2baac952f15c6608311f3a7319db9b39ad4ea87b [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000051typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
53} BdrvRequestFlags;
54
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020055static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000056static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
57 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000058 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000059static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
60 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000061 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020062static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
63 int64_t sector_num, int nb_sectors,
64 QEMUIOVector *iov);
65static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
66 int64_t sector_num, int nb_sectors,
67 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010068static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000069 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
70 BdrvRequestFlags flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010071static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
72 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010073static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
74 int64_t sector_num,
75 QEMUIOVector *qiov,
76 int nb_sectors,
77 BlockDriverCompletionFunc *cb,
78 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010079 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010080static void coroutine_fn bdrv_co_do_rw(void *opaque);
bellardec530c82006-04-25 22:36:06 +000081
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080082static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
83 bool is_write, double elapsed_time, uint64_t *wait);
84static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
85 double elapsed_time, uint64_t *wait);
86static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
87 bool is_write, int64_t *wait);
88
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010089static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
90 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000091
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010092static QLIST_HEAD(, BlockDriver) bdrv_drivers =
93 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000094
Markus Armbrusterf9092b12010-06-25 10:33:39 +020095/* The device to use for VM snapshots */
96static BlockDriverState *bs_snapshots;
97
Markus Armbrustereb852012009-10-27 18:41:44 +010098/* If non-zero, use only whitelisted block drivers */
99static int use_bdrv_whitelist;
100
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000101#ifdef _WIN32
102static int is_windows_drive_prefix(const char *filename)
103{
104 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
105 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
106 filename[1] == ':');
107}
108
109int is_windows_drive(const char *filename)
110{
111 if (is_windows_drive_prefix(filename) &&
112 filename[2] == '\0')
113 return 1;
114 if (strstart(filename, "\\\\.\\", NULL) ||
115 strstart(filename, "//./", NULL))
116 return 1;
117 return 0;
118}
119#endif
120
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800121/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800122void bdrv_io_limits_disable(BlockDriverState *bs)
123{
124 bs->io_limits_enabled = false;
125
126 while (qemu_co_queue_next(&bs->throttled_reqs));
127
128 if (bs->block_timer) {
129 qemu_del_timer(bs->block_timer);
130 qemu_free_timer(bs->block_timer);
131 bs->block_timer = NULL;
132 }
133
134 bs->slice_start = 0;
135 bs->slice_end = 0;
136 bs->slice_time = 0;
137 memset(&bs->io_base, 0, sizeof(bs->io_base));
138}
139
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800140static void bdrv_block_timer(void *opaque)
141{
142 BlockDriverState *bs = opaque;
143
144 qemu_co_queue_next(&bs->throttled_reqs);
145}
146
147void bdrv_io_limits_enable(BlockDriverState *bs)
148{
149 qemu_co_queue_init(&bs->throttled_reqs);
150 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
151 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
152 bs->slice_start = qemu_get_clock_ns(vm_clock);
153 bs->slice_end = bs->slice_start + bs->slice_time;
154 memset(&bs->io_base, 0, sizeof(bs->io_base));
155 bs->io_limits_enabled = true;
156}
157
158bool bdrv_io_limits_enabled(BlockDriverState *bs)
159{
160 BlockIOLimit *io_limits = &bs->io_limits;
161 return io_limits->bps[BLOCK_IO_LIMIT_READ]
162 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
163 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
164 || io_limits->iops[BLOCK_IO_LIMIT_READ]
165 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
166 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
167}
168
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800169static void bdrv_io_limits_intercept(BlockDriverState *bs,
170 bool is_write, int nb_sectors)
171{
172 int64_t wait_time = -1;
173
174 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
175 qemu_co_queue_wait(&bs->throttled_reqs);
176 }
177
178 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
179 * throttled requests will not be dequeued until the current request is
180 * allowed to be serviced. So if the current request still exceeds the
181 * limits, it will be inserted to the head. All requests followed it will
182 * be still in throttled_reqs queue.
183 */
184
185 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
186 qemu_mod_timer(bs->block_timer,
187 wait_time + qemu_get_clock_ns(vm_clock));
188 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
189 }
190
191 qemu_co_queue_next(&bs->throttled_reqs);
192}
193
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000194/* check if the path starts with "<protocol>:" */
195static int path_has_protocol(const char *path)
196{
197#ifdef _WIN32
198 if (is_windows_drive(path) ||
199 is_windows_drive_prefix(path)) {
200 return 0;
201 }
202#endif
203
204 return strchr(path, ':') != NULL;
205}
206
bellard83f64092006-08-01 16:21:11 +0000207int path_is_absolute(const char *path)
208{
209 const char *p;
bellard21664422007-01-07 18:22:37 +0000210#ifdef _WIN32
211 /* specific case for names like: "\\.\d:" */
212 if (*path == '/' || *path == '\\')
213 return 1;
214#endif
bellard83f64092006-08-01 16:21:11 +0000215 p = strchr(path, ':');
216 if (p)
217 p++;
218 else
219 p = path;
bellard3b9f94e2007-01-07 17:27:07 +0000220#ifdef _WIN32
221 return (*p == '/' || *p == '\\');
222#else
223 return (*p == '/');
224#endif
bellard83f64092006-08-01 16:21:11 +0000225}
226
227/* if filename is absolute, just copy it to dest. Otherwise, build a
228 path to it by considering it is relative to base_path. URL are
229 supported. */
230void path_combine(char *dest, int dest_size,
231 const char *base_path,
232 const char *filename)
233{
234 const char *p, *p1;
235 int len;
236
237 if (dest_size <= 0)
238 return;
239 if (path_is_absolute(filename)) {
240 pstrcpy(dest, dest_size, filename);
241 } else {
242 p = strchr(base_path, ':');
243 if (p)
244 p++;
245 else
246 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000247 p1 = strrchr(base_path, '/');
248#ifdef _WIN32
249 {
250 const char *p2;
251 p2 = strrchr(base_path, '\\');
252 if (!p1 || p2 > p1)
253 p1 = p2;
254 }
255#endif
bellard83f64092006-08-01 16:21:11 +0000256 if (p1)
257 p1++;
258 else
259 p1 = base_path;
260 if (p1 > p)
261 p = p1;
262 len = p - base_path;
263 if (len > dest_size - 1)
264 len = dest_size - 1;
265 memcpy(dest, base_path, len);
266 dest[len] = '\0';
267 pstrcat(dest, dest_size, filename);
268 }
269}
270
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500271void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000272{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100273 /* Block drivers without coroutine functions need emulation */
274 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200275 bdrv->bdrv_co_readv = bdrv_co_readv_em;
276 bdrv->bdrv_co_writev = bdrv_co_writev_em;
277
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100278 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
279 * the block driver lacks aio we need to emulate that too.
280 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200281 if (!bdrv->bdrv_aio_readv) {
282 /* add AIO emulation layer */
283 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
284 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200285 }
bellard83f64092006-08-01 16:21:11 +0000286 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200287
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100288 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000289}
bellardb3380822004-03-14 21:38:54 +0000290
291/* create a new block device (by default it is empty) */
292BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000293{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100294 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000295
Anthony Liguori7267c092011-08-20 22:09:37 -0500296 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000297 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000298 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100299 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000300 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300301 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000302 return bs;
303}
304
bellardea2384d2004-08-01 21:59:26 +0000305BlockDriver *bdrv_find_format(const char *format_name)
306{
307 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100308 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
309 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000310 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100311 }
bellardea2384d2004-08-01 21:59:26 +0000312 }
313 return NULL;
314}
315
Markus Armbrustereb852012009-10-27 18:41:44 +0100316static int bdrv_is_whitelisted(BlockDriver *drv)
317{
318 static const char *whitelist[] = {
319 CONFIG_BDRV_WHITELIST
320 };
321 const char **p;
322
323 if (!whitelist[0])
324 return 1; /* no whitelist, anything goes */
325
326 for (p = whitelist; *p; p++) {
327 if (!strcmp(drv->format_name, *p)) {
328 return 1;
329 }
330 }
331 return 0;
332}
333
334BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
335{
336 BlockDriver *drv = bdrv_find_format(format_name);
337 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
338}
339
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200340int bdrv_create(BlockDriver *drv, const char* filename,
341 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000342{
343 if (!drv->bdrv_create)
344 return -ENOTSUP;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200345
346 return drv->bdrv_create(filename, options);
bellardea2384d2004-08-01 21:59:26 +0000347}
348
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200349int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
350{
351 BlockDriver *drv;
352
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900353 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200354 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000355 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200356 }
357
358 return bdrv_create(drv, filename, options);
359}
360
bellardd5249392004-08-03 21:14:23 +0000361#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000362void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000363{
bellard3b9f94e2007-01-07 17:27:07 +0000364 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000365
bellard3b9f94e2007-01-07 17:27:07 +0000366 GetTempPath(MAX_PATH, temp_dir);
367 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000368}
369#else
bellard95389c82005-12-18 18:28:15 +0000370void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000371{
372 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000373 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000374 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000375 tmpdir = getenv("TMPDIR");
376 if (!tmpdir)
377 tmpdir = "/tmp";
378 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000379 fd = mkstemp(filename);
380 close(fd);
381}
bellardd5249392004-08-03 21:14:23 +0000382#endif
bellardea2384d2004-08-01 21:59:26 +0000383
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200384/*
385 * Detect host devices. By convention, /dev/cdrom[N] is always
386 * recognized as a host CDROM.
387 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200388static BlockDriver *find_hdev_driver(const char *filename)
389{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200390 int score_max = 0, score;
391 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200392
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100393 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200394 if (d->bdrv_probe_device) {
395 score = d->bdrv_probe_device(filename);
396 if (score > score_max) {
397 score_max = score;
398 drv = d;
399 }
400 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200401 }
402
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200403 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200404}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200405
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900406BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200407{
408 BlockDriver *drv1;
409 char protocol[128];
410 int len;
411 const char *p;
412
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200413 /* TODO Drivers without bdrv_file_open must be specified explicitly */
414
Christoph Hellwig39508e72010-06-23 12:25:17 +0200415 /*
416 * XXX(hch): we really should not let host device detection
417 * override an explicit protocol specification, but moving this
418 * later breaks access to device names with colons in them.
419 * Thanks to the brain-dead persistent naming schemes on udev-
420 * based Linux systems those actually are quite common.
421 */
422 drv1 = find_hdev_driver(filename);
423 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200424 return drv1;
425 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200426
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000427 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200428 return bdrv_find_format("file");
429 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000430 p = strchr(filename, ':');
431 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200432 len = p - filename;
433 if (len > sizeof(protocol) - 1)
434 len = sizeof(protocol) - 1;
435 memcpy(protocol, filename, len);
436 protocol[len] = '\0';
437 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
438 if (drv1->protocol_name &&
439 !strcmp(drv1->protocol_name, protocol)) {
440 return drv1;
441 }
442 }
443 return NULL;
444}
445
Stefan Weilc98ac352010-07-21 21:51:51 +0200446static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000447{
bellard83f64092006-08-01 16:21:11 +0000448 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000449 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000450 uint8_t buf[2048];
451 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000452
Naphtali Spreif5edb012010-01-17 16:48:13 +0200453 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200454 if (ret < 0) {
455 *pdrv = NULL;
456 return ret;
457 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700458
Kevin Wolf08a00552010-06-01 18:37:31 +0200459 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
460 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700461 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200462 drv = bdrv_find_format("raw");
463 if (!drv) {
464 ret = -ENOENT;
465 }
466 *pdrv = drv;
467 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700468 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700469
bellard83f64092006-08-01 16:21:11 +0000470 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
471 bdrv_delete(bs);
472 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200473 *pdrv = NULL;
474 return ret;
bellard83f64092006-08-01 16:21:11 +0000475 }
476
bellardea2384d2004-08-01 21:59:26 +0000477 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200478 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100479 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000480 if (drv1->bdrv_probe) {
481 score = drv1->bdrv_probe(buf, ret, filename);
482 if (score > score_max) {
483 score_max = score;
484 drv = drv1;
485 }
bellardea2384d2004-08-01 21:59:26 +0000486 }
487 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200488 if (!drv) {
489 ret = -ENOENT;
490 }
491 *pdrv = drv;
492 return ret;
bellardea2384d2004-08-01 21:59:26 +0000493}
494
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100495/**
496 * Set the current 'total_sectors' value
497 */
498static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
499{
500 BlockDriver *drv = bs->drv;
501
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700502 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
503 if (bs->sg)
504 return 0;
505
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100506 /* query actual device if possible, otherwise just trust the hint */
507 if (drv->bdrv_getlength) {
508 int64_t length = drv->bdrv_getlength(bs);
509 if (length < 0) {
510 return length;
511 }
512 hint = length >> BDRV_SECTOR_BITS;
513 }
514
515 bs->total_sectors = hint;
516 return 0;
517}
518
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100519/**
520 * Set open flags for a given cache mode
521 *
522 * Return 0 on success, -1 if the cache mode was invalid.
523 */
524int bdrv_parse_cache_flags(const char *mode, int *flags)
525{
526 *flags &= ~BDRV_O_CACHE_MASK;
527
528 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
529 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100530 } else if (!strcmp(mode, "directsync")) {
531 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100532 } else if (!strcmp(mode, "writeback")) {
533 *flags |= BDRV_O_CACHE_WB;
534 } else if (!strcmp(mode, "unsafe")) {
535 *flags |= BDRV_O_CACHE_WB;
536 *flags |= BDRV_O_NO_FLUSH;
537 } else if (!strcmp(mode, "writethrough")) {
538 /* this is the default */
539 } else {
540 return -1;
541 }
542
543 return 0;
544}
545
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000546/**
547 * The copy-on-read flag is actually a reference count so multiple users may
548 * use the feature without worrying about clobbering its previous state.
549 * Copy-on-read stays enabled until all users have called to disable it.
550 */
551void bdrv_enable_copy_on_read(BlockDriverState *bs)
552{
553 bs->copy_on_read++;
554}
555
556void bdrv_disable_copy_on_read(BlockDriverState *bs)
557{
558 assert(bs->copy_on_read > 0);
559 bs->copy_on_read--;
560}
561
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200562/*
Kevin Wolf57915332010-04-14 15:24:50 +0200563 * Common part for opening disk images and files
564 */
565static int bdrv_open_common(BlockDriverState *bs, const char *filename,
566 int flags, BlockDriver *drv)
567{
568 int ret, open_flags;
569
570 assert(drv != NULL);
571
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100572 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
573
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200574 bs->file = NULL;
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100575 bs->total_sectors = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200576 bs->encrypted = 0;
577 bs->valid_key = 0;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100578 bs->sg = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200579 bs->open_flags = flags;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100580 bs->growable = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200581 bs->buffer_alignment = 512;
582
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000583 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
584 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
585 bdrv_enable_copy_on_read(bs);
586 }
587
Kevin Wolf57915332010-04-14 15:24:50 +0200588 pstrcpy(bs->filename, sizeof(bs->filename), filename);
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100589 bs->backing_file[0] = '\0';
Kevin Wolf57915332010-04-14 15:24:50 +0200590
591 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
592 return -ENOTSUP;
593 }
594
595 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500596 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200597
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100598 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200599
600 /*
601 * Clear flags that are internal to the block layer before opening the
602 * image.
603 */
604 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
605
606 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200607 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200608 */
609 if (bs->is_temporary) {
610 open_flags |= BDRV_O_RDWR;
611 }
612
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100613 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
614
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200615 /* Open the image, either directly or using a protocol */
616 if (drv->bdrv_file_open) {
617 ret = drv->bdrv_file_open(bs, filename, open_flags);
618 } else {
619 ret = bdrv_file_open(&bs->file, filename, open_flags);
620 if (ret >= 0) {
621 ret = drv->bdrv_open(bs, open_flags);
622 }
623 }
624
Kevin Wolf57915332010-04-14 15:24:50 +0200625 if (ret < 0) {
626 goto free_and_fail;
627 }
628
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100629 ret = refresh_total_sectors(bs, bs->total_sectors);
630 if (ret < 0) {
631 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200632 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100633
Kevin Wolf57915332010-04-14 15:24:50 +0200634#ifndef _WIN32
635 if (bs->is_temporary) {
636 unlink(filename);
637 }
638#endif
639 return 0;
640
641free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200642 if (bs->file) {
643 bdrv_delete(bs->file);
644 bs->file = NULL;
645 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500646 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200647 bs->opaque = NULL;
648 bs->drv = NULL;
649 return ret;
650}
651
652/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200653 * Opens a file using a protocol (file, host_device, nbd, ...)
654 */
bellard83f64092006-08-01 16:21:11 +0000655int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000656{
bellard83f64092006-08-01 16:21:11 +0000657 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200658 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000659 int ret;
660
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900661 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200662 if (!drv) {
663 return -ENOENT;
664 }
665
bellard83f64092006-08-01 16:21:11 +0000666 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200667 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000668 if (ret < 0) {
669 bdrv_delete(bs);
670 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000671 }
aliguori71d07702009-03-03 17:37:16 +0000672 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000673 *pbs = bs;
674 return 0;
bellardea2384d2004-08-01 21:59:26 +0000675}
bellardfc01f7e2003-06-30 10:03:06 +0000676
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200677/*
678 * Opens a disk image (raw, qcow2, vmdk, ...)
679 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200680int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
681 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000682{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200683 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200684 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000685
bellard83f64092006-08-01 16:21:11 +0000686 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000687 BlockDriverState *bs1;
688 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000689 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200690 BlockDriver *bdrv_qcow2;
691 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200692 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000693
bellardea2384d2004-08-01 21:59:26 +0000694 /* if snapshot, we create a temporary backing file and open it
695 instead of opening 'filename' directly */
696
697 /* if there is a backing file, use it */
698 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200699 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000700 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000701 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000702 return ret;
bellardea2384d2004-08-01 21:59:26 +0000703 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200704 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000705
706 if (bs1->drv && bs1->drv->protocol_name)
707 is_protocol = 1;
708
bellardea2384d2004-08-01 21:59:26 +0000709 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000710
bellardea2384d2004-08-01 21:59:26 +0000711 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000712
713 /* Real path is meaningless for protocols */
714 if (is_protocol)
715 snprintf(backing_filename, sizeof(backing_filename),
716 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000717 else if (!realpath(filename, backing_filename))
718 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000719
Kevin Wolf91a073a2009-05-27 14:48:06 +0200720 bdrv_qcow2 = bdrv_find_format("qcow2");
721 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
722
Jes Sorensen3e829902010-05-27 16:20:30 +0200723 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200724 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
725 if (drv) {
726 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
727 drv->format_name);
728 }
729
730 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200731 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000732 if (ret < 0) {
733 return ret;
bellardea2384d2004-08-01 21:59:26 +0000734 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200735
bellardea2384d2004-08-01 21:59:26 +0000736 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200737 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000738 bs->is_temporary = 1;
739 }
bellard712e7872005-04-28 21:09:32 +0000740
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200741 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200742 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200743 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000744 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100745
aliguori51d7c002009-03-05 23:00:29 +0000746 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000747 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000748 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200749
750 /* Open the image */
751 ret = bdrv_open_common(bs, filename, flags, drv);
752 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100753 goto unlink_and_fail;
754 }
755
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200756 /* If there is a backing file, use it */
757 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
758 char backing_filename[PATH_MAX];
759 int back_flags;
760 BlockDriver *back_drv = NULL;
761
762 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000763
764 if (path_has_protocol(bs->backing_file)) {
765 pstrcpy(backing_filename, sizeof(backing_filename),
766 bs->backing_file);
767 } else {
768 path_combine(backing_filename, sizeof(backing_filename),
769 filename, bs->backing_file);
770 }
771
772 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200773 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000774 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200775
776 /* backing files always opened read-only */
777 back_flags =
778 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
779
780 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
781 if (ret < 0) {
782 bdrv_close(bs);
783 return ret;
784 }
785 if (bs->is_temporary) {
786 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
787 } else {
788 /* base image inherits from "parent" */
789 bs->backing_hd->keep_read_only = bs->keep_read_only;
790 }
791 }
792
793 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200794 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200795 }
796
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800797 /* throttling disk I/O limits */
798 if (bs->io_limits_enabled) {
799 bdrv_io_limits_enable(bs);
800 }
801
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200802 return 0;
803
804unlink_and_fail:
805 if (bs->is_temporary) {
806 unlink(filename);
807 }
808 return ret;
809}
810
bellardfc01f7e2003-06-30 10:03:06 +0000811void bdrv_close(BlockDriverState *bs)
812{
bellard19cb3732006-08-19 11:45:59 +0000813 if (bs->drv) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200814 if (bs == bs_snapshots) {
815 bs_snapshots = NULL;
816 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100817 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000818 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100819 bs->backing_hd = NULL;
820 }
bellardea2384d2004-08-01 21:59:26 +0000821 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500822 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000823#ifdef _WIN32
824 if (bs->is_temporary) {
825 unlink(bs->filename);
826 }
bellard67b915a2004-03-31 23:37:16 +0000827#endif
bellardea2384d2004-08-01 21:59:26 +0000828 bs->opaque = NULL;
829 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000830 bs->copy_on_read = 0;
bellardb3380822004-03-14 21:38:54 +0000831
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200832 if (bs->file != NULL) {
833 bdrv_close(bs->file);
834 }
835
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200836 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000837 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800838
839 /*throttling disk I/O limits*/
840 if (bs->io_limits_enabled) {
841 bdrv_io_limits_disable(bs);
842 }
bellardb3380822004-03-14 21:38:54 +0000843}
844
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900845void bdrv_close_all(void)
846{
847 BlockDriverState *bs;
848
849 QTAILQ_FOREACH(bs, &bdrv_states, list) {
850 bdrv_close(bs);
851 }
852}
853
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000854/*
855 * Wait for pending requests to complete across all BlockDriverStates
856 *
857 * This function does not flush data to disk, use bdrv_flush_all() for that
858 * after calling this function.
859 */
860void bdrv_drain_all(void)
861{
862 BlockDriverState *bs;
863
864 qemu_aio_flush();
865
866 /* If requests are still pending there is a bug somewhere */
867 QTAILQ_FOREACH(bs, &bdrv_states, list) {
868 assert(QLIST_EMPTY(&bs->tracked_requests));
869 assert(qemu_co_queue_empty(&bs->throttled_reqs));
870 }
871}
872
Ryan Harperd22b2f42011-03-29 20:51:47 -0500873/* make a BlockDriverState anonymous by removing from bdrv_state list.
874 Also, NULL terminate the device_name to prevent double remove */
875void bdrv_make_anon(BlockDriverState *bs)
876{
877 if (bs->device_name[0] != '\0') {
878 QTAILQ_REMOVE(&bdrv_states, bs, list);
879 }
880 bs->device_name[0] = '\0';
881}
882
bellardb3380822004-03-14 21:38:54 +0000883void bdrv_delete(BlockDriverState *bs)
884{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200885 assert(!bs->dev);
Markus Armbruster18846de2010-06-29 16:58:30 +0200886
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100887 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -0500888 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +0000889
bellardb3380822004-03-14 21:38:54 +0000890 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200891 if (bs->file != NULL) {
892 bdrv_delete(bs->file);
893 }
894
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200895 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -0500896 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +0000897}
898
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200899int bdrv_attach_dev(BlockDriverState *bs, void *dev)
900/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +0200901{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200902 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +0200903 return -EBUSY;
904 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200905 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -0300906 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +0200907 return 0;
908}
909
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200910/* TODO qdevified devices don't use this, remove when devices are qdevified */
911void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +0200912{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200913 if (bdrv_attach_dev(bs, dev) < 0) {
914 abort();
915 }
916}
917
918void bdrv_detach_dev(BlockDriverState *bs, void *dev)
919/* TODO change to DeviceState *dev when all users are qdevified */
920{
921 assert(bs->dev == dev);
922 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +0200923 bs->dev_ops = NULL;
924 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +0200925 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +0200926}
927
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200928/* TODO change to return DeviceState * when all users are qdevified */
929void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +0200930{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200931 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +0200932}
933
Markus Armbruster0e49de52011-08-03 15:07:41 +0200934void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
935 void *opaque)
936{
937 bs->dev_ops = ops;
938 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200939 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
940 bs_snapshots = NULL;
941 }
Markus Armbruster0e49de52011-08-03 15:07:41 +0200942}
943
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200944static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +0200945{
Markus Armbruster145feb12011-08-03 15:07:42 +0200946 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200947 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Markus Armbruster145feb12011-08-03 15:07:42 +0200948 }
949}
950
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200951bool bdrv_dev_has_removable_media(BlockDriverState *bs)
952{
953 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
954}
955
Paolo Bonzini025ccaa2011-11-07 17:50:13 +0100956void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
957{
958 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
959 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
960 }
961}
962
Markus Armbrustere4def802011-09-06 18:58:53 +0200963bool bdrv_dev_is_tray_open(BlockDriverState *bs)
964{
965 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
966 return bs->dev_ops->is_tray_open(bs->dev_opaque);
967 }
968 return false;
969}
970
Markus Armbruster145feb12011-08-03 15:07:42 +0200971static void bdrv_dev_resize_cb(BlockDriverState *bs)
972{
973 if (bs->dev_ops && bs->dev_ops->resize_cb) {
974 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +0200975 }
976}
977
Markus Armbrusterf1076392011-09-06 18:58:46 +0200978bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
979{
980 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
981 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
982 }
983 return false;
984}
985
aliguorie97fc192009-04-21 23:11:50 +0000986/*
987 * Run consistency checks on an image
988 *
Kevin Wolfe076f332010-06-29 11:43:13 +0200989 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +0200990 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +0200991 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +0000992 */
Kevin Wolfe076f332010-06-29 11:43:13 +0200993int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +0000994{
995 if (bs->drv->bdrv_check == NULL) {
996 return -ENOTSUP;
997 }
998
Kevin Wolfe076f332010-06-29 11:43:13 +0200999 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +02001000 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +00001001}
1002
Kevin Wolf8a426612010-07-16 17:17:01 +02001003#define COMMIT_BUF_SECTORS 2048
1004
bellard33e39632003-07-06 17:15:21 +00001005/* commit COW file into the raw image */
1006int bdrv_commit(BlockDriverState *bs)
1007{
bellard19cb3732006-08-19 11:45:59 +00001008 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +02001009 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +02001010 int64_t sector, total_sectors;
1011 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001012 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +02001013 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001014 char filename[1024];
1015 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +00001016
bellard19cb3732006-08-19 11:45:59 +00001017 if (!drv)
1018 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001019
1020 if (!bs->backing_hd) {
1021 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00001022 }
1023
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001024 if (bs->backing_hd->keep_read_only) {
1025 return -EACCES;
1026 }
Kevin Wolfee181192010-08-05 13:05:22 +02001027
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00001028 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1029 return -EBUSY;
1030 }
1031
Kevin Wolfee181192010-08-05 13:05:22 +02001032 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001033 ro = bs->backing_hd->read_only;
1034 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1035 open_flags = bs->backing_hd->open_flags;
1036
1037 if (ro) {
1038 /* re-open as RW */
1039 bdrv_delete(bs->backing_hd);
1040 bs->backing_hd = NULL;
1041 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001042 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1043 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001044 if (rw_ret < 0) {
1045 bdrv_delete(bs_rw);
1046 /* try to re-open read-only */
1047 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001048 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1049 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001050 if (ret < 0) {
1051 bdrv_delete(bs_ro);
1052 /* drive not functional anymore */
1053 bs->drv = NULL;
1054 return ret;
1055 }
1056 bs->backing_hd = bs_ro;
1057 return rw_ret;
1058 }
1059 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001060 }
bellardea2384d2004-08-01 21:59:26 +00001061
Jan Kiszka6ea44302009-11-30 18:21:19 +01001062 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001063 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001064
Kevin Wolf8a426612010-07-16 17:17:01 +02001065 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001066 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001067
1068 if (bdrv_read(bs, sector, buf, n) != 0) {
1069 ret = -EIO;
1070 goto ro_cleanup;
1071 }
1072
1073 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1074 ret = -EIO;
1075 goto ro_cleanup;
1076 }
bellardea2384d2004-08-01 21:59:26 +00001077 }
1078 }
bellard95389c82005-12-18 18:28:15 +00001079
Christoph Hellwig1d449522010-01-17 12:32:30 +01001080 if (drv->bdrv_make_empty) {
1081 ret = drv->bdrv_make_empty(bs);
1082 bdrv_flush(bs);
1083 }
bellard95389c82005-12-18 18:28:15 +00001084
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001085 /*
1086 * Make sure all data we wrote to the backing device is actually
1087 * stable on disk.
1088 */
1089 if (bs->backing_hd)
1090 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001091
1092ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001093 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001094
1095 if (ro) {
1096 /* re-open as RO */
1097 bdrv_delete(bs->backing_hd);
1098 bs->backing_hd = NULL;
1099 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001100 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1101 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001102 if (ret < 0) {
1103 bdrv_delete(bs_ro);
1104 /* drive not functional anymore */
1105 bs->drv = NULL;
1106 return ret;
1107 }
1108 bs->backing_hd = bs_ro;
1109 bs->backing_hd->keep_read_only = 0;
1110 }
1111
Christoph Hellwig1d449522010-01-17 12:32:30 +01001112 return ret;
bellard33e39632003-07-06 17:15:21 +00001113}
1114
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001115void bdrv_commit_all(void)
1116{
1117 BlockDriverState *bs;
1118
1119 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1120 bdrv_commit(bs);
1121 }
1122}
1123
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001124struct BdrvTrackedRequest {
1125 BlockDriverState *bs;
1126 int64_t sector_num;
1127 int nb_sectors;
1128 bool is_write;
1129 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001130 Coroutine *co; /* owner, used for deadlock detection */
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001131 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001132};
1133
1134/**
1135 * Remove an active request from the tracked requests list
1136 *
1137 * This function should be called when a tracked request is completing.
1138 */
1139static void tracked_request_end(BdrvTrackedRequest *req)
1140{
1141 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001142 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001143}
1144
1145/**
1146 * Add an active request to the tracked requests list
1147 */
1148static void tracked_request_begin(BdrvTrackedRequest *req,
1149 BlockDriverState *bs,
1150 int64_t sector_num,
1151 int nb_sectors, bool is_write)
1152{
1153 *req = (BdrvTrackedRequest){
1154 .bs = bs,
1155 .sector_num = sector_num,
1156 .nb_sectors = nb_sectors,
1157 .is_write = is_write,
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001158 .co = qemu_coroutine_self(),
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001159 };
1160
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001161 qemu_co_queue_init(&req->wait_queue);
1162
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001163 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1164}
1165
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001166/**
1167 * Round a region to cluster boundaries
1168 */
1169static void round_to_clusters(BlockDriverState *bs,
1170 int64_t sector_num, int nb_sectors,
1171 int64_t *cluster_sector_num,
1172 int *cluster_nb_sectors)
1173{
1174 BlockDriverInfo bdi;
1175
1176 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1177 *cluster_sector_num = sector_num;
1178 *cluster_nb_sectors = nb_sectors;
1179 } else {
1180 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1181 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1182 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1183 nb_sectors, c);
1184 }
1185}
1186
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001187static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1188 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001189 /* aaaa bbbb */
1190 if (sector_num >= req->sector_num + req->nb_sectors) {
1191 return false;
1192 }
1193 /* bbbb aaaa */
1194 if (req->sector_num >= sector_num + nb_sectors) {
1195 return false;
1196 }
1197 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001198}
1199
1200static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1201 int64_t sector_num, int nb_sectors)
1202{
1203 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001204 int64_t cluster_sector_num;
1205 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001206 bool retry;
1207
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001208 /* If we touch the same cluster it counts as an overlap. This guarantees
1209 * that allocating writes will be serialized and not race with each other
1210 * for the same cluster. For example, in copy-on-read it ensures that the
1211 * CoR read and write operations are atomic and guest writes cannot
1212 * interleave between them.
1213 */
1214 round_to_clusters(bs, sector_num, nb_sectors,
1215 &cluster_sector_num, &cluster_nb_sectors);
1216
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001217 do {
1218 retry = false;
1219 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001220 if (tracked_request_overlaps(req, cluster_sector_num,
1221 cluster_nb_sectors)) {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001222 /* Hitting this means there was a reentrant request, for
1223 * example, a block driver issuing nested requests. This must
1224 * never happen since it means deadlock.
1225 */
1226 assert(qemu_coroutine_self() != req->co);
1227
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001228 qemu_co_queue_wait(&req->wait_queue);
1229 retry = true;
1230 break;
1231 }
1232 }
1233 } while (retry);
1234}
1235
Kevin Wolf756e6732010-01-12 12:55:17 +01001236/*
1237 * Return values:
1238 * 0 - success
1239 * -EINVAL - backing format specified, but no file
1240 * -ENOSPC - can't update the backing file because no space is left in the
1241 * image file header
1242 * -ENOTSUP - format driver doesn't support changing the backing file
1243 */
1244int bdrv_change_backing_file(BlockDriverState *bs,
1245 const char *backing_file, const char *backing_fmt)
1246{
1247 BlockDriver *drv = bs->drv;
1248
1249 if (drv->bdrv_change_backing_file != NULL) {
1250 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1251 } else {
1252 return -ENOTSUP;
1253 }
1254}
1255
aliguori71d07702009-03-03 17:37:16 +00001256static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1257 size_t size)
1258{
1259 int64_t len;
1260
1261 if (!bdrv_is_inserted(bs))
1262 return -ENOMEDIUM;
1263
1264 if (bs->growable)
1265 return 0;
1266
1267 len = bdrv_getlength(bs);
1268
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001269 if (offset < 0)
1270 return -EIO;
1271
1272 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001273 return -EIO;
1274
1275 return 0;
1276}
1277
1278static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1279 int nb_sectors)
1280{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001281 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1282 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001283}
1284
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001285typedef struct RwCo {
1286 BlockDriverState *bs;
1287 int64_t sector_num;
1288 int nb_sectors;
1289 QEMUIOVector *qiov;
1290 bool is_write;
1291 int ret;
1292} RwCo;
1293
1294static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1295{
1296 RwCo *rwco = opaque;
1297
1298 if (!rwco->is_write) {
1299 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001300 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001301 } else {
1302 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1303 rwco->nb_sectors, rwco->qiov);
1304 }
1305}
1306
1307/*
1308 * Process a synchronous request using coroutines
1309 */
1310static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1311 int nb_sectors, bool is_write)
1312{
1313 QEMUIOVector qiov;
1314 struct iovec iov = {
1315 .iov_base = (void *)buf,
1316 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1317 };
1318 Coroutine *co;
1319 RwCo rwco = {
1320 .bs = bs,
1321 .sector_num = sector_num,
1322 .nb_sectors = nb_sectors,
1323 .qiov = &qiov,
1324 .is_write = is_write,
1325 .ret = NOT_DONE,
1326 };
1327
1328 qemu_iovec_init_external(&qiov, &iov, 1);
1329
1330 if (qemu_in_coroutine()) {
1331 /* Fast-path if already in coroutine context */
1332 bdrv_rw_co_entry(&rwco);
1333 } else {
1334 co = qemu_coroutine_create(bdrv_rw_co_entry);
1335 qemu_coroutine_enter(co, &rwco);
1336 while (rwco.ret == NOT_DONE) {
1337 qemu_aio_wait();
1338 }
1339 }
1340 return rwco.ret;
1341}
1342
bellard19cb3732006-08-19 11:45:59 +00001343/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001344int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001345 uint8_t *buf, int nb_sectors)
1346{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001347 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001348}
1349
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001350static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001351 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001352{
1353 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001354 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001355
Jan Kiszka6ea44302009-11-30 18:21:19 +01001356 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001357 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001358
1359 for (; start <= end; start++) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01001360 idx = start / (sizeof(unsigned long) * 8);
1361 bit = start % (sizeof(unsigned long) * 8);
1362 val = bs->dirty_bitmap[idx];
1363 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001364 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001365 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001366 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001367 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001368 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001369 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001370 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001371 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001372 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001373 }
1374 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001375 }
1376}
1377
ths5fafdf22007-09-16 21:08:06 +00001378/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001379 -EIO generic I/O error (may happen for all errors)
1380 -ENOMEDIUM No media inserted.
1381 -EINVAL Invalid sector number or nb_sectors
1382 -EACCES Trying to write a read-only device
1383*/
ths5fafdf22007-09-16 21:08:06 +00001384int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001385 const uint8_t *buf, int nb_sectors)
1386{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001387 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001388}
1389
aliguorieda578e2009-03-12 19:57:16 +00001390int bdrv_pread(BlockDriverState *bs, int64_t offset,
1391 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001392{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001393 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001394 int len, nb_sectors, count;
1395 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001396 int ret;
bellard83f64092006-08-01 16:21:11 +00001397
1398 count = count1;
1399 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001400 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001401 if (len > count)
1402 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001403 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001404 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001405 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1406 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001407 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001408 count -= len;
1409 if (count == 0)
1410 return count1;
1411 sector_num++;
1412 buf += len;
1413 }
1414
1415 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001416 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001417 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001418 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1419 return ret;
bellard83f64092006-08-01 16:21:11 +00001420 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001421 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001422 buf += len;
1423 count -= len;
1424 }
1425
1426 /* add data from the last sector */
1427 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001428 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1429 return ret;
bellard83f64092006-08-01 16:21:11 +00001430 memcpy(buf, tmp_buf, count);
1431 }
1432 return count1;
1433}
1434
aliguorieda578e2009-03-12 19:57:16 +00001435int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1436 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001437{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001438 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001439 int len, nb_sectors, count;
1440 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001441 int ret;
bellard83f64092006-08-01 16:21:11 +00001442
1443 count = count1;
1444 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001445 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001446 if (len > count)
1447 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001448 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001449 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001450 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1451 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001452 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001453 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1454 return ret;
bellard83f64092006-08-01 16:21:11 +00001455 count -= len;
1456 if (count == 0)
1457 return count1;
1458 sector_num++;
1459 buf += len;
1460 }
1461
1462 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001463 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001464 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001465 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1466 return ret;
bellard83f64092006-08-01 16:21:11 +00001467 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001468 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001469 buf += len;
1470 count -= len;
1471 }
1472
1473 /* add data from the last sector */
1474 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001475 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1476 return ret;
bellard83f64092006-08-01 16:21:11 +00001477 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001478 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1479 return ret;
bellard83f64092006-08-01 16:21:11 +00001480 }
1481 return count1;
1482}
bellard83f64092006-08-01 16:21:11 +00001483
Kevin Wolff08145f2010-06-16 16:38:15 +02001484/*
1485 * Writes to the file and ensures that no writes are reordered across this
1486 * request (acts as a barrier)
1487 *
1488 * Returns 0 on success, -errno in error cases.
1489 */
1490int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1491 const void *buf, int count)
1492{
1493 int ret;
1494
1495 ret = bdrv_pwrite(bs, offset, buf, count);
1496 if (ret < 0) {
1497 return ret;
1498 }
1499
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001500 /* No flush needed for cache modes that use O_DSYNC */
1501 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001502 bdrv_flush(bs);
1503 }
1504
1505 return 0;
1506}
1507
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001508static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001509 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1510{
1511 /* Perform I/O through a temporary buffer so that users who scribble over
1512 * their read buffer while the operation is in progress do not end up
1513 * modifying the image file. This is critical for zero-copy guest I/O
1514 * where anything might happen inside guest memory.
1515 */
1516 void *bounce_buffer;
1517
1518 struct iovec iov;
1519 QEMUIOVector bounce_qiov;
1520 int64_t cluster_sector_num;
1521 int cluster_nb_sectors;
1522 size_t skip_bytes;
1523 int ret;
1524
1525 /* Cover entire cluster so no additional backing file I/O is required when
1526 * allocating cluster in the image file.
1527 */
1528 round_to_clusters(bs, sector_num, nb_sectors,
1529 &cluster_sector_num, &cluster_nb_sectors);
1530
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001531 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1532 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001533
1534 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1535 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1536 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1537
1538 ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1539 &bounce_qiov);
1540 if (ret < 0) {
1541 goto err;
1542 }
1543
1544 ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
1545 &bounce_qiov);
1546 if (ret < 0) {
1547 /* It might be okay to ignore write errors for guest requests. If this
1548 * is a deliberate copy-on-read then we don't want to ignore the error.
1549 * Simply report it in all cases.
1550 */
1551 goto err;
1552 }
1553
1554 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1555 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1556 nb_sectors * BDRV_SECTOR_SIZE);
1557
1558err:
1559 qemu_vfree(bounce_buffer);
1560 return ret;
1561}
1562
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001563/*
1564 * Handle a read request in coroutine context
1565 */
1566static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001567 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1568 BdrvRequestFlags flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001569{
1570 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001571 BdrvTrackedRequest req;
1572 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001573
Kevin Wolfda1fa912011-07-14 17:27:13 +02001574 if (!drv) {
1575 return -ENOMEDIUM;
1576 }
1577 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1578 return -EIO;
1579 }
1580
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001581 /* throttling disk read I/O */
1582 if (bs->io_limits_enabled) {
1583 bdrv_io_limits_intercept(bs, false, nb_sectors);
1584 }
1585
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001586 if (bs->copy_on_read) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001587 flags |= BDRV_REQ_COPY_ON_READ;
1588 }
1589 if (flags & BDRV_REQ_COPY_ON_READ) {
1590 bs->copy_on_read_in_flight++;
1591 }
1592
1593 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001594 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1595 }
1596
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001597 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001598
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001599 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00001600 int pnum;
1601
1602 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1603 if (ret < 0) {
1604 goto out;
1605 }
1606
1607 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001608 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001609 goto out;
1610 }
1611 }
1612
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001613 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001614
1615out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001616 tracked_request_end(&req);
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001617
1618 if (flags & BDRV_REQ_COPY_ON_READ) {
1619 bs->copy_on_read_in_flight--;
1620 }
1621
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001622 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001623}
1624
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001625int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001626 int nb_sectors, QEMUIOVector *qiov)
1627{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001628 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001629
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001630 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1631}
1632
1633int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1634 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1635{
1636 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1637
1638 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1639 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001640}
1641
1642/*
1643 * Handle a write request in coroutine context
1644 */
1645static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1646 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1647{
1648 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001649 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001650 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001651
1652 if (!bs->drv) {
1653 return -ENOMEDIUM;
1654 }
1655 if (bs->read_only) {
1656 return -EACCES;
1657 }
1658 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1659 return -EIO;
1660 }
1661
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001662 /* throttling disk write I/O */
1663 if (bs->io_limits_enabled) {
1664 bdrv_io_limits_intercept(bs, true, nb_sectors);
1665 }
1666
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001667 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001668 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1669 }
1670
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001671 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1672
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001673 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1674
Kevin Wolfda1fa912011-07-14 17:27:13 +02001675 if (bs->dirty_bitmap) {
1676 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1677 }
1678
1679 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1680 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1681 }
1682
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001683 tracked_request_end(&req);
1684
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001685 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001686}
1687
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001688int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1689 int nb_sectors, QEMUIOVector *qiov)
1690{
1691 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1692
1693 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1694}
1695
bellard83f64092006-08-01 16:21:11 +00001696/**
bellard83f64092006-08-01 16:21:11 +00001697 * Truncate file to 'offset' bytes (needed only for file protocols)
1698 */
1699int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1700{
1701 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001702 int ret;
bellard83f64092006-08-01 16:21:11 +00001703 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001704 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00001705 if (!drv->bdrv_truncate)
1706 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02001707 if (bs->read_only)
1708 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02001709 if (bdrv_in_use(bs))
1710 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001711 ret = drv->bdrv_truncate(bs, offset);
1712 if (ret == 0) {
1713 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02001714 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001715 }
1716 return ret;
bellard83f64092006-08-01 16:21:11 +00001717}
1718
1719/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08001720 * Length of a allocated file in bytes. Sparse files are counted by actual
1721 * allocated space. Return < 0 if error or unknown.
1722 */
1723int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1724{
1725 BlockDriver *drv = bs->drv;
1726 if (!drv) {
1727 return -ENOMEDIUM;
1728 }
1729 if (drv->bdrv_get_allocated_file_size) {
1730 return drv->bdrv_get_allocated_file_size(bs);
1731 }
1732 if (bs->file) {
1733 return bdrv_get_allocated_file_size(bs->file);
1734 }
1735 return -ENOTSUP;
1736}
1737
1738/**
bellard83f64092006-08-01 16:21:11 +00001739 * Length of a file in bytes. Return < 0 if error or unknown.
1740 */
1741int64_t bdrv_getlength(BlockDriverState *bs)
1742{
1743 BlockDriver *drv = bs->drv;
1744 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001745 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001746
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001747 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001748 if (drv->bdrv_getlength) {
1749 return drv->bdrv_getlength(bs);
1750 }
bellard83f64092006-08-01 16:21:11 +00001751 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001752 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00001753}
1754
bellard19cb3732006-08-19 11:45:59 +00001755/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00001756void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00001757{
bellard19cb3732006-08-19 11:45:59 +00001758 int64_t length;
1759 length = bdrv_getlength(bs);
1760 if (length < 0)
1761 length = 0;
1762 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01001763 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00001764 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00001765}
bellardcf989512004-02-16 21:56:36 +00001766
aliguorif3d54fc2008-11-25 21:50:24 +00001767struct partition {
1768 uint8_t boot_ind; /* 0x80 - active */
1769 uint8_t head; /* starting head */
1770 uint8_t sector; /* starting sector */
1771 uint8_t cyl; /* starting cylinder */
1772 uint8_t sys_ind; /* What partition type */
1773 uint8_t end_head; /* end head */
1774 uint8_t end_sector; /* end sector */
1775 uint8_t end_cyl; /* end cylinder */
1776 uint32_t start_sect; /* starting sector counting from 0 */
1777 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02001778} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00001779
1780/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1781static int guess_disk_lchs(BlockDriverState *bs,
1782 int *pcylinders, int *pheads, int *psectors)
1783{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001784 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00001785 int ret, i, heads, sectors, cylinders;
1786 struct partition *p;
1787 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00001788 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001789
1790 bdrv_get_geometry(bs, &nb_sectors);
1791
1792 ret = bdrv_read(bs, 0, buf, 1);
1793 if (ret < 0)
1794 return -1;
1795 /* test msdos magic */
1796 if (buf[510] != 0x55 || buf[511] != 0xaa)
1797 return -1;
1798 for(i = 0; i < 4; i++) {
1799 p = ((struct partition *)(buf + 0x1be)) + i;
1800 nr_sects = le32_to_cpu(p->nr_sects);
1801 if (nr_sects && p->end_head) {
1802 /* We make the assumption that the partition terminates on
1803 a cylinder boundary */
1804 heads = p->end_head + 1;
1805 sectors = p->end_sector & 63;
1806 if (sectors == 0)
1807 continue;
1808 cylinders = nb_sectors / (heads * sectors);
1809 if (cylinders < 1 || cylinders > 16383)
1810 continue;
1811 *pheads = heads;
1812 *psectors = sectors;
1813 *pcylinders = cylinders;
1814#if 0
1815 printf("guessed geometry: LCHS=%d %d %d\n",
1816 cylinders, heads, sectors);
1817#endif
1818 return 0;
1819 }
1820 }
1821 return -1;
1822}
1823
1824void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1825{
1826 int translation, lba_detected = 0;
1827 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00001828 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001829
1830 /* if a geometry hint is available, use it */
1831 bdrv_get_geometry(bs, &nb_sectors);
1832 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1833 translation = bdrv_get_translation_hint(bs);
1834 if (cylinders != 0) {
1835 *pcyls = cylinders;
1836 *pheads = heads;
1837 *psecs = secs;
1838 } else {
1839 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1840 if (heads > 16) {
1841 /* if heads > 16, it means that a BIOS LBA
1842 translation was active, so the default
1843 hardware geometry is OK */
1844 lba_detected = 1;
1845 goto default_geometry;
1846 } else {
1847 *pcyls = cylinders;
1848 *pheads = heads;
1849 *psecs = secs;
1850 /* disable any translation to be in sync with
1851 the logical geometry */
1852 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1853 bdrv_set_translation_hint(bs,
1854 BIOS_ATA_TRANSLATION_NONE);
1855 }
1856 }
1857 } else {
1858 default_geometry:
1859 /* if no geometry, use a standard physical disk geometry */
1860 cylinders = nb_sectors / (16 * 63);
1861
1862 if (cylinders > 16383)
1863 cylinders = 16383;
1864 else if (cylinders < 2)
1865 cylinders = 2;
1866 *pcyls = cylinders;
1867 *pheads = 16;
1868 *psecs = 63;
1869 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1870 if ((*pcyls * *pheads) <= 131072) {
1871 bdrv_set_translation_hint(bs,
1872 BIOS_ATA_TRANSLATION_LARGE);
1873 } else {
1874 bdrv_set_translation_hint(bs,
1875 BIOS_ATA_TRANSLATION_LBA);
1876 }
1877 }
1878 }
1879 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1880 }
1881}
1882
ths5fafdf22007-09-16 21:08:06 +00001883void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001884 int cyls, int heads, int secs)
1885{
1886 bs->cyls = cyls;
1887 bs->heads = heads;
1888 bs->secs = secs;
1889}
1890
bellard46d47672004-11-16 01:45:27 +00001891void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1892{
1893 bs->translation = translation;
1894}
1895
ths5fafdf22007-09-16 21:08:06 +00001896void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001897 int *pcyls, int *pheads, int *psecs)
1898{
1899 *pcyls = bs->cyls;
1900 *pheads = bs->heads;
1901 *psecs = bs->secs;
1902}
1903
Zhi Yong Wu0563e192011-11-03 16:57:25 +08001904/* throttling disk io limits */
1905void bdrv_set_io_limits(BlockDriverState *bs,
1906 BlockIOLimit *io_limits)
1907{
1908 bs->io_limits = *io_limits;
1909 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1910}
1911
Blue Swirl5bbdbb42011-02-12 20:43:32 +00001912/* Recognize floppy formats */
1913typedef struct FDFormat {
1914 FDriveType drive;
1915 uint8_t last_sect;
1916 uint8_t max_track;
1917 uint8_t max_head;
1918} FDFormat;
1919
1920static const FDFormat fd_formats[] = {
1921 /* First entry is default format */
1922 /* 1.44 MB 3"1/2 floppy disks */
1923 { FDRIVE_DRV_144, 18, 80, 1, },
1924 { FDRIVE_DRV_144, 20, 80, 1, },
1925 { FDRIVE_DRV_144, 21, 80, 1, },
1926 { FDRIVE_DRV_144, 21, 82, 1, },
1927 { FDRIVE_DRV_144, 21, 83, 1, },
1928 { FDRIVE_DRV_144, 22, 80, 1, },
1929 { FDRIVE_DRV_144, 23, 80, 1, },
1930 { FDRIVE_DRV_144, 24, 80, 1, },
1931 /* 2.88 MB 3"1/2 floppy disks */
1932 { FDRIVE_DRV_288, 36, 80, 1, },
1933 { FDRIVE_DRV_288, 39, 80, 1, },
1934 { FDRIVE_DRV_288, 40, 80, 1, },
1935 { FDRIVE_DRV_288, 44, 80, 1, },
1936 { FDRIVE_DRV_288, 48, 80, 1, },
1937 /* 720 kB 3"1/2 floppy disks */
1938 { FDRIVE_DRV_144, 9, 80, 1, },
1939 { FDRIVE_DRV_144, 10, 80, 1, },
1940 { FDRIVE_DRV_144, 10, 82, 1, },
1941 { FDRIVE_DRV_144, 10, 83, 1, },
1942 { FDRIVE_DRV_144, 13, 80, 1, },
1943 { FDRIVE_DRV_144, 14, 80, 1, },
1944 /* 1.2 MB 5"1/4 floppy disks */
1945 { FDRIVE_DRV_120, 15, 80, 1, },
1946 { FDRIVE_DRV_120, 18, 80, 1, },
1947 { FDRIVE_DRV_120, 18, 82, 1, },
1948 { FDRIVE_DRV_120, 18, 83, 1, },
1949 { FDRIVE_DRV_120, 20, 80, 1, },
1950 /* 720 kB 5"1/4 floppy disks */
1951 { FDRIVE_DRV_120, 9, 80, 1, },
1952 { FDRIVE_DRV_120, 11, 80, 1, },
1953 /* 360 kB 5"1/4 floppy disks */
1954 { FDRIVE_DRV_120, 9, 40, 1, },
1955 { FDRIVE_DRV_120, 9, 40, 0, },
1956 { FDRIVE_DRV_120, 10, 41, 1, },
1957 { FDRIVE_DRV_120, 10, 42, 1, },
1958 /* 320 kB 5"1/4 floppy disks */
1959 { FDRIVE_DRV_120, 8, 40, 1, },
1960 { FDRIVE_DRV_120, 8, 40, 0, },
1961 /* 360 kB must match 5"1/4 better than 3"1/2... */
1962 { FDRIVE_DRV_144, 9, 80, 0, },
1963 /* end */
1964 { FDRIVE_DRV_NONE, -1, -1, 0, },
1965};
1966
1967void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1968 int *max_track, int *last_sect,
1969 FDriveType drive_in, FDriveType *drive)
1970{
1971 const FDFormat *parse;
1972 uint64_t nb_sectors, size;
1973 int i, first_match, match;
1974
1975 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1976 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1977 /* User defined disk */
1978 } else {
1979 bdrv_get_geometry(bs, &nb_sectors);
1980 match = -1;
1981 first_match = -1;
1982 for (i = 0; ; i++) {
1983 parse = &fd_formats[i];
1984 if (parse->drive == FDRIVE_DRV_NONE) {
1985 break;
1986 }
1987 if (drive_in == parse->drive ||
1988 drive_in == FDRIVE_DRV_NONE) {
1989 size = (parse->max_head + 1) * parse->max_track *
1990 parse->last_sect;
1991 if (nb_sectors == size) {
1992 match = i;
1993 break;
1994 }
1995 if (first_match == -1) {
1996 first_match = i;
1997 }
1998 }
1999 }
2000 if (match == -1) {
2001 if (first_match == -1) {
2002 match = 1;
2003 } else {
2004 match = first_match;
2005 }
2006 parse = &fd_formats[match];
2007 }
2008 *nb_heads = parse->max_head + 1;
2009 *max_track = parse->max_track;
2010 *last_sect = parse->last_sect;
2011 *drive = parse->drive;
2012 }
2013}
2014
bellard46d47672004-11-16 01:45:27 +00002015int bdrv_get_translation_hint(BlockDriverState *bs)
2016{
2017 return bs->translation;
2018}
2019
Markus Armbrusterabd7f682010-06-02 18:55:17 +02002020void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2021 BlockErrorAction on_write_error)
2022{
2023 bs->on_read_error = on_read_error;
2024 bs->on_write_error = on_write_error;
2025}
2026
2027BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2028{
2029 return is_read ? bs->on_read_error : bs->on_write_error;
2030}
2031
bellardb3380822004-03-14 21:38:54 +00002032int bdrv_is_read_only(BlockDriverState *bs)
2033{
2034 return bs->read_only;
2035}
2036
ths985a03b2007-12-24 16:10:43 +00002037int bdrv_is_sg(BlockDriverState *bs)
2038{
2039 return bs->sg;
2040}
2041
Christoph Hellwige900a7b2009-09-04 19:01:15 +02002042int bdrv_enable_write_cache(BlockDriverState *bs)
2043{
2044 return bs->enable_write_cache;
2045}
2046
bellardea2384d2004-08-01 21:59:26 +00002047int bdrv_is_encrypted(BlockDriverState *bs)
2048{
2049 if (bs->backing_hd && bs->backing_hd->encrypted)
2050 return 1;
2051 return bs->encrypted;
2052}
2053
aliguoric0f4ce72009-03-05 23:01:01 +00002054int bdrv_key_required(BlockDriverState *bs)
2055{
2056 BlockDriverState *backing_hd = bs->backing_hd;
2057
2058 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2059 return 1;
2060 return (bs->encrypted && !bs->valid_key);
2061}
2062
bellardea2384d2004-08-01 21:59:26 +00002063int bdrv_set_key(BlockDriverState *bs, const char *key)
2064{
2065 int ret;
2066 if (bs->backing_hd && bs->backing_hd->encrypted) {
2067 ret = bdrv_set_key(bs->backing_hd, key);
2068 if (ret < 0)
2069 return ret;
2070 if (!bs->encrypted)
2071 return 0;
2072 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002073 if (!bs->encrypted) {
2074 return -EINVAL;
2075 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2076 return -ENOMEDIUM;
2077 }
aliguoric0f4ce72009-03-05 23:01:01 +00002078 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002079 if (ret < 0) {
2080 bs->valid_key = 0;
2081 } else if (!bs->valid_key) {
2082 bs->valid_key = 1;
2083 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002084 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002085 }
aliguoric0f4ce72009-03-05 23:01:01 +00002086 return ret;
bellardea2384d2004-08-01 21:59:26 +00002087}
2088
2089void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2090{
bellard19cb3732006-08-19 11:45:59 +00002091 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00002092 buf[0] = '\0';
2093 } else {
2094 pstrcpy(buf, buf_size, bs->drv->format_name);
2095 }
2096}
2097
ths5fafdf22007-09-16 21:08:06 +00002098void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002099 void *opaque)
2100{
2101 BlockDriver *drv;
2102
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002103 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002104 it(opaque, drv->format_name);
2105 }
2106}
2107
bellardb3380822004-03-14 21:38:54 +00002108BlockDriverState *bdrv_find(const char *name)
2109{
2110 BlockDriverState *bs;
2111
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002112 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2113 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002114 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002115 }
bellardb3380822004-03-14 21:38:54 +00002116 }
2117 return NULL;
2118}
2119
Markus Armbruster2f399b02010-06-02 18:55:20 +02002120BlockDriverState *bdrv_next(BlockDriverState *bs)
2121{
2122 if (!bs) {
2123 return QTAILQ_FIRST(&bdrv_states);
2124 }
2125 return QTAILQ_NEXT(bs, list);
2126}
2127
aliguori51de9762009-03-05 23:00:43 +00002128void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002129{
2130 BlockDriverState *bs;
2131
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002132 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002133 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002134 }
2135}
2136
bellardea2384d2004-08-01 21:59:26 +00002137const char *bdrv_get_device_name(BlockDriverState *bs)
2138{
2139 return bs->device_name;
2140}
2141
aliguoric6ca28d2008-10-06 13:55:43 +00002142void bdrv_flush_all(void)
2143{
2144 BlockDriverState *bs;
2145
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002146 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Markus Armbrusterc602a482011-08-03 15:08:10 +02002147 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
aliguoric6ca28d2008-10-06 13:55:43 +00002148 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002149 }
2150 }
aliguoric6ca28d2008-10-06 13:55:43 +00002151}
2152
Kevin Wolff2feebb2010-04-14 17:30:35 +02002153int bdrv_has_zero_init(BlockDriverState *bs)
2154{
2155 assert(bs->drv);
2156
Kevin Wolf336c1c12010-07-28 11:26:29 +02002157 if (bs->drv->bdrv_has_zero_init) {
2158 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002159 }
2160
2161 return 1;
2162}
2163
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002164typedef struct BdrvCoIsAllocatedData {
2165 BlockDriverState *bs;
2166 int64_t sector_num;
2167 int nb_sectors;
2168 int *pnum;
2169 int ret;
2170 bool done;
2171} BdrvCoIsAllocatedData;
2172
thsf58c7b32008-06-05 21:53:49 +00002173/*
2174 * Returns true iff the specified sector is present in the disk image. Drivers
2175 * not implementing the functionality are assumed to not support backing files,
2176 * hence all their sectors are reported as allocated.
2177 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002178 * If 'sector_num' is beyond the end of the disk image the return value is 0
2179 * and 'pnum' is set to 0.
2180 *
thsf58c7b32008-06-05 21:53:49 +00002181 * 'pnum' is set to the number of sectors (including and immediately following
2182 * the specified sector) that are known to be in the same
2183 * allocated/unallocated state.
2184 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002185 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2186 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00002187 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002188int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2189 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002190{
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002191 int64_t n;
2192
2193 if (sector_num >= bs->total_sectors) {
2194 *pnum = 0;
2195 return 0;
2196 }
2197
2198 n = bs->total_sectors - sector_num;
2199 if (n < nb_sectors) {
2200 nb_sectors = n;
2201 }
2202
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002203 if (!bs->drv->bdrv_co_is_allocated) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002204 *pnum = nb_sectors;
thsf58c7b32008-06-05 21:53:49 +00002205 return 1;
2206 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002207
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002208 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2209}
2210
2211/* Coroutine wrapper for bdrv_is_allocated() */
2212static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2213{
2214 BdrvCoIsAllocatedData *data = opaque;
2215 BlockDriverState *bs = data->bs;
2216
2217 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2218 data->pnum);
2219 data->done = true;
2220}
2221
2222/*
2223 * Synchronous wrapper around bdrv_co_is_allocated().
2224 *
2225 * See bdrv_co_is_allocated() for details.
2226 */
2227int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2228 int *pnum)
2229{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002230 Coroutine *co;
2231 BdrvCoIsAllocatedData data = {
2232 .bs = bs,
2233 .sector_num = sector_num,
2234 .nb_sectors = nb_sectors,
2235 .pnum = pnum,
2236 .done = false,
2237 };
2238
2239 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2240 qemu_coroutine_enter(co, &data);
2241 while (!data.done) {
2242 qemu_aio_wait();
2243 }
2244 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002245}
2246
Luiz Capitulino2582bfe2010-02-03 12:41:01 -02002247void bdrv_mon_event(const BlockDriverState *bdrv,
2248 BlockMonEventAction action, int is_read)
2249{
2250 QObject *data;
2251 const char *action_str;
2252
2253 switch (action) {
2254 case BDRV_ACTION_REPORT:
2255 action_str = "report";
2256 break;
2257 case BDRV_ACTION_IGNORE:
2258 action_str = "ignore";
2259 break;
2260 case BDRV_ACTION_STOP:
2261 action_str = "stop";
2262 break;
2263 default:
2264 abort();
2265 }
2266
2267 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2268 bdrv->device_name,
2269 action_str,
2270 is_read ? "read" : "write");
2271 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2272
2273 qobject_decref(data);
2274}
2275
Luiz Capitulinob2023812011-09-21 17:16:47 -03002276BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002277{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002278 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002279 BlockDriverState *bs;
2280
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002281 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002282 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002283
Luiz Capitulinob2023812011-09-21 17:16:47 -03002284 info->value = g_malloc0(sizeof(*info->value));
2285 info->value->device = g_strdup(bs->device_name);
2286 info->value->type = g_strdup("unknown");
2287 info->value->locked = bdrv_dev_is_medium_locked(bs);
2288 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002289
Markus Armbrustere4def802011-09-06 18:58:53 +02002290 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002291 info->value->has_tray_open = true;
2292 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002293 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002294
2295 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002296 info->value->has_io_status = true;
2297 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002298 }
2299
bellard19cb3732006-08-19 11:45:59 +00002300 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002301 info->value->has_inserted = true;
2302 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2303 info->value->inserted->file = g_strdup(bs->filename);
2304 info->value->inserted->ro = bs->read_only;
2305 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2306 info->value->inserted->encrypted = bs->encrypted;
2307 if (bs->backing_file[0]) {
2308 info->value->inserted->has_backing_file = true;
2309 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002310 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002311
2312 if (bs->io_limits_enabled) {
2313 info->value->inserted->bps =
2314 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2315 info->value->inserted->bps_rd =
2316 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2317 info->value->inserted->bps_wr =
2318 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2319 info->value->inserted->iops =
2320 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2321 info->value->inserted->iops_rd =
2322 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2323 info->value->inserted->iops_wr =
2324 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2325 }
bellardb3380822004-03-14 21:38:54 +00002326 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002327
2328 /* XXX: waiting for the qapi to support GSList */
2329 if (!cur_item) {
2330 head = cur_item = info;
2331 } else {
2332 cur_item->next = info;
2333 cur_item = info;
2334 }
bellardb3380822004-03-14 21:38:54 +00002335 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002336
Luiz Capitulinob2023812011-09-21 17:16:47 -03002337 return head;
bellardb3380822004-03-14 21:38:54 +00002338}
thsa36e69d2007-12-02 05:18:19 +00002339
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002340/* Consider exposing this as a full fledged QMP command */
2341static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002342{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002343 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002344
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002345 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002346
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002347 if (bs->device_name[0]) {
2348 s->has_device = true;
2349 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002350 }
2351
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002352 s->stats = g_malloc0(sizeof(*s->stats));
2353 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2354 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2355 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2356 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2357 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2358 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2359 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2360 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2361 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2362
Kevin Wolf294cc352010-04-28 14:34:01 +02002363 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002364 s->has_parent = true;
2365 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002366 }
2367
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002368 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002369}
2370
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002371BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002372{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002373 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002374 BlockDriverState *bs;
2375
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002376 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002377 BlockStatsList *info = g_malloc0(sizeof(*info));
2378 info->value = qmp_query_blockstat(bs, NULL);
2379
2380 /* XXX: waiting for the qapi to support GSList */
2381 if (!cur_item) {
2382 head = cur_item = info;
2383 } else {
2384 cur_item->next = info;
2385 cur_item = info;
2386 }
thsa36e69d2007-12-02 05:18:19 +00002387 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002388
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002389 return head;
thsa36e69d2007-12-02 05:18:19 +00002390}
bellardea2384d2004-08-01 21:59:26 +00002391
aliguori045df332009-03-05 23:00:48 +00002392const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2393{
2394 if (bs->backing_hd && bs->backing_hd->encrypted)
2395 return bs->backing_file;
2396 else if (bs->encrypted)
2397 return bs->filename;
2398 else
2399 return NULL;
2400}
2401
ths5fafdf22007-09-16 21:08:06 +00002402void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002403 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002404{
Kevin Wolf3574c602011-10-26 11:02:11 +02002405 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002406}
2407
ths5fafdf22007-09-16 21:08:06 +00002408int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002409 const uint8_t *buf, int nb_sectors)
2410{
2411 BlockDriver *drv = bs->drv;
2412 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002413 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002414 if (!drv->bdrv_write_compressed)
2415 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002416 if (bdrv_check_request(bs, sector_num, nb_sectors))
2417 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002418
Jan Kiszkac6d22832009-11-30 18:21:20 +01002419 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002420 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2421 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002422
bellardfaea38e2006-08-05 21:31:00 +00002423 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2424}
ths3b46e622007-09-17 08:09:54 +00002425
bellardfaea38e2006-08-05 21:31:00 +00002426int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2427{
2428 BlockDriver *drv = bs->drv;
2429 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002430 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002431 if (!drv->bdrv_get_info)
2432 return -ENOTSUP;
2433 memset(bdi, 0, sizeof(*bdi));
2434 return drv->bdrv_get_info(bs, bdi);
2435}
2436
Christoph Hellwig45566e92009-07-10 23:11:57 +02002437int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2438 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002439{
2440 BlockDriver *drv = bs->drv;
2441 if (!drv)
2442 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002443 if (drv->bdrv_save_vmstate)
2444 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2445 if (bs->file)
2446 return bdrv_save_vmstate(bs->file, buf, pos, size);
2447 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002448}
2449
Christoph Hellwig45566e92009-07-10 23:11:57 +02002450int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2451 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002452{
2453 BlockDriver *drv = bs->drv;
2454 if (!drv)
2455 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002456 if (drv->bdrv_load_vmstate)
2457 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2458 if (bs->file)
2459 return bdrv_load_vmstate(bs->file, buf, pos, size);
2460 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002461}
2462
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002463void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2464{
2465 BlockDriver *drv = bs->drv;
2466
2467 if (!drv || !drv->bdrv_debug_event) {
2468 return;
2469 }
2470
2471 return drv->bdrv_debug_event(bs, event);
2472
2473}
2474
bellardfaea38e2006-08-05 21:31:00 +00002475/**************************************************************/
2476/* handling of snapshots */
2477
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002478int bdrv_can_snapshot(BlockDriverState *bs)
2479{
2480 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002481 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002482 return 0;
2483 }
2484
2485 if (!drv->bdrv_snapshot_create) {
2486 if (bs->file != NULL) {
2487 return bdrv_can_snapshot(bs->file);
2488 }
2489 return 0;
2490 }
2491
2492 return 1;
2493}
2494
Blue Swirl199630b2010-07-25 20:49:34 +00002495int bdrv_is_snapshot(BlockDriverState *bs)
2496{
2497 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2498}
2499
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002500BlockDriverState *bdrv_snapshots(void)
2501{
2502 BlockDriverState *bs;
2503
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002504 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002505 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002506 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002507
2508 bs = NULL;
2509 while ((bs = bdrv_next(bs))) {
2510 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002511 bs_snapshots = bs;
2512 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002513 }
2514 }
2515 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002516}
2517
ths5fafdf22007-09-16 21:08:06 +00002518int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002519 QEMUSnapshotInfo *sn_info)
2520{
2521 BlockDriver *drv = bs->drv;
2522 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002523 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002524 if (drv->bdrv_snapshot_create)
2525 return drv->bdrv_snapshot_create(bs, sn_info);
2526 if (bs->file)
2527 return bdrv_snapshot_create(bs->file, sn_info);
2528 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002529}
2530
ths5fafdf22007-09-16 21:08:06 +00002531int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002532 const char *snapshot_id)
2533{
2534 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002535 int ret, open_ret;
2536
bellardfaea38e2006-08-05 21:31:00 +00002537 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002538 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002539 if (drv->bdrv_snapshot_goto)
2540 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2541
2542 if (bs->file) {
2543 drv->bdrv_close(bs);
2544 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2545 open_ret = drv->bdrv_open(bs, bs->open_flags);
2546 if (open_ret < 0) {
2547 bdrv_delete(bs->file);
2548 bs->drv = NULL;
2549 return open_ret;
2550 }
2551 return ret;
2552 }
2553
2554 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002555}
2556
2557int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2558{
2559 BlockDriver *drv = bs->drv;
2560 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002561 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002562 if (drv->bdrv_snapshot_delete)
2563 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2564 if (bs->file)
2565 return bdrv_snapshot_delete(bs->file, snapshot_id);
2566 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002567}
2568
ths5fafdf22007-09-16 21:08:06 +00002569int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002570 QEMUSnapshotInfo **psn_info)
2571{
2572 BlockDriver *drv = bs->drv;
2573 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002574 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002575 if (drv->bdrv_snapshot_list)
2576 return drv->bdrv_snapshot_list(bs, psn_info);
2577 if (bs->file)
2578 return bdrv_snapshot_list(bs->file, psn_info);
2579 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002580}
2581
edison51ef6722010-09-21 19:58:41 -07002582int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2583 const char *snapshot_name)
2584{
2585 BlockDriver *drv = bs->drv;
2586 if (!drv) {
2587 return -ENOMEDIUM;
2588 }
2589 if (!bs->read_only) {
2590 return -EINVAL;
2591 }
2592 if (drv->bdrv_snapshot_load_tmp) {
2593 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2594 }
2595 return -ENOTSUP;
2596}
2597
bellardfaea38e2006-08-05 21:31:00 +00002598#define NB_SUFFIXES 4
2599
2600char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2601{
2602 static const char suffixes[NB_SUFFIXES] = "KMGT";
2603 int64_t base;
2604 int i;
2605
2606 if (size <= 999) {
2607 snprintf(buf, buf_size, "%" PRId64, size);
2608 } else {
2609 base = 1024;
2610 for(i = 0; i < NB_SUFFIXES; i++) {
2611 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002612 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002613 (double)size / base,
2614 suffixes[i]);
2615 break;
2616 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002617 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002618 ((size + (base >> 1)) / base),
2619 suffixes[i]);
2620 break;
2621 }
2622 base = base * 1024;
2623 }
2624 }
2625 return buf;
2626}
2627
2628char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2629{
2630 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002631#ifdef _WIN32
2632 struct tm *ptm;
2633#else
bellardfaea38e2006-08-05 21:31:00 +00002634 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002635#endif
bellardfaea38e2006-08-05 21:31:00 +00002636 time_t ti;
2637 int64_t secs;
2638
2639 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002640 snprintf(buf, buf_size,
2641 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002642 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2643 } else {
2644 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002645#ifdef _WIN32
2646 ptm = localtime(&ti);
2647 strftime(date_buf, sizeof(date_buf),
2648 "%Y-%m-%d %H:%M:%S", ptm);
2649#else
bellardfaea38e2006-08-05 21:31:00 +00002650 localtime_r(&ti, &tm);
2651 strftime(date_buf, sizeof(date_buf),
2652 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002653#endif
bellardfaea38e2006-08-05 21:31:00 +00002654 secs = sn->vm_clock_nsec / 1000000000;
2655 snprintf(clock_buf, sizeof(clock_buf),
2656 "%02d:%02d:%02d.%03d",
2657 (int)(secs / 3600),
2658 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002659 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002660 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2661 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002662 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002663 sn->id_str, sn->name,
2664 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2665 date_buf,
2666 clock_buf);
2667 }
2668 return buf;
2669}
2670
bellard83f64092006-08-01 16:21:11 +00002671/**************************************************************/
2672/* async I/Os */
2673
aliguori3b69e4b2009-01-22 16:59:24 +00002674BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002675 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002676 BlockDriverCompletionFunc *cb, void *opaque)
2677{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002678 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2679
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002680 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002681 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002682}
2683
aliguorif141eaf2009-04-07 18:43:24 +00002684BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2685 QEMUIOVector *qiov, int nb_sectors,
2686 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002687{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002688 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2689
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002690 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002691 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002692}
2693
Kevin Wolf40b4f532009-09-09 17:53:37 +02002694
2695typedef struct MultiwriteCB {
2696 int error;
2697 int num_requests;
2698 int num_callbacks;
2699 struct {
2700 BlockDriverCompletionFunc *cb;
2701 void *opaque;
2702 QEMUIOVector *free_qiov;
2703 void *free_buf;
2704 } callbacks[];
2705} MultiwriteCB;
2706
2707static void multiwrite_user_cb(MultiwriteCB *mcb)
2708{
2709 int i;
2710
2711 for (i = 0; i < mcb->num_callbacks; i++) {
2712 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01002713 if (mcb->callbacks[i].free_qiov) {
2714 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2715 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002716 g_free(mcb->callbacks[i].free_qiov);
Herve Poussineauf8a83242010-01-24 21:23:56 +00002717 qemu_vfree(mcb->callbacks[i].free_buf);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002718 }
2719}
2720
2721static void multiwrite_cb(void *opaque, int ret)
2722{
2723 MultiwriteCB *mcb = opaque;
2724
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002725 trace_multiwrite_cb(mcb, ret);
2726
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02002727 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02002728 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002729 }
2730
2731 mcb->num_requests--;
2732 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02002733 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05002734 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002735 }
2736}
2737
2738static int multiwrite_req_compare(const void *a, const void *b)
2739{
Christoph Hellwig77be4362010-05-19 20:53:10 +02002740 const BlockRequest *req1 = a, *req2 = b;
2741
2742 /*
2743 * Note that we can't simply subtract req2->sector from req1->sector
2744 * here as that could overflow the return value.
2745 */
2746 if (req1->sector > req2->sector) {
2747 return 1;
2748 } else if (req1->sector < req2->sector) {
2749 return -1;
2750 } else {
2751 return 0;
2752 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002753}
2754
2755/*
2756 * Takes a bunch of requests and tries to merge them. Returns the number of
2757 * requests that remain after merging.
2758 */
2759static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2760 int num_reqs, MultiwriteCB *mcb)
2761{
2762 int i, outidx;
2763
2764 // Sort requests by start sector
2765 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2766
2767 // Check if adjacent requests touch the same clusters. If so, combine them,
2768 // filling up gaps with zero sectors.
2769 outidx = 0;
2770 for (i = 1; i < num_reqs; i++) {
2771 int merge = 0;
2772 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2773
2774 // This handles the cases that are valid for all block drivers, namely
2775 // exactly sequential writes and overlapping writes.
2776 if (reqs[i].sector <= oldreq_last) {
2777 merge = 1;
2778 }
2779
2780 // The block driver may decide that it makes sense to combine requests
2781 // even if there is a gap of some sectors between them. In this case,
2782 // the gap is filled with zeros (therefore only applicable for yet
2783 // unused space in format like qcow2).
2784 if (!merge && bs->drv->bdrv_merge_requests) {
2785 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2786 }
2787
Christoph Hellwige2a305f2010-01-26 14:49:08 +01002788 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2789 merge = 0;
2790 }
2791
Kevin Wolf40b4f532009-09-09 17:53:37 +02002792 if (merge) {
2793 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05002794 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002795 qemu_iovec_init(qiov,
2796 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2797
2798 // Add the first request to the merged one. If the requests are
2799 // overlapping, drop the last sectors of the first request.
2800 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2801 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2802
2803 // We might need to add some zeros between the two requests
2804 if (reqs[i].sector > oldreq_last) {
2805 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2806 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2807 memset(buf, 0, zero_bytes);
2808 qemu_iovec_add(qiov, buf, zero_bytes);
2809 mcb->callbacks[i].free_buf = buf;
2810 }
2811
2812 // Add the second request
2813 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2814
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02002815 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002816 reqs[outidx].qiov = qiov;
2817
2818 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2819 } else {
2820 outidx++;
2821 reqs[outidx].sector = reqs[i].sector;
2822 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2823 reqs[outidx].qiov = reqs[i].qiov;
2824 }
2825 }
2826
2827 return outidx + 1;
2828}
2829
2830/*
2831 * Submit multiple AIO write requests at once.
2832 *
2833 * On success, the function returns 0 and all requests in the reqs array have
2834 * been submitted. In error case this function returns -1, and any of the
2835 * requests may or may not be submitted yet. In particular, this means that the
2836 * callback will be called for some of the requests, for others it won't. The
2837 * caller must check the error field of the BlockRequest to wait for the right
2838 * callbacks (if error != 0, no callback will be called).
2839 *
2840 * The implementation may modify the contents of the reqs array, e.g. to merge
2841 * requests. However, the fields opaque and error are left unmodified as they
2842 * are used to signal failure for a single request to the caller.
2843 */
2844int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2845{
Kevin Wolf40b4f532009-09-09 17:53:37 +02002846 MultiwriteCB *mcb;
2847 int i;
2848
Ryan Harper301db7c2011-03-07 10:01:04 -06002849 /* don't submit writes if we don't have a medium */
2850 if (bs->drv == NULL) {
2851 for (i = 0; i < num_reqs; i++) {
2852 reqs[i].error = -ENOMEDIUM;
2853 }
2854 return -1;
2855 }
2856
Kevin Wolf40b4f532009-09-09 17:53:37 +02002857 if (num_reqs == 0) {
2858 return 0;
2859 }
2860
2861 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05002862 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002863 mcb->num_requests = 0;
2864 mcb->num_callbacks = num_reqs;
2865
2866 for (i = 0; i < num_reqs; i++) {
2867 mcb->callbacks[i].cb = reqs[i].cb;
2868 mcb->callbacks[i].opaque = reqs[i].opaque;
2869 }
2870
2871 // Check for mergable requests
2872 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2873
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002874 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2875
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01002876 /* Run the aio requests. */
2877 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002878 for (i = 0; i < num_reqs; i++) {
Paolo Bonziniad54ae82011-11-30 09:12:30 +01002879 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
Kevin Wolf40b4f532009-09-09 17:53:37 +02002880 reqs[i].nb_sectors, multiwrite_cb, mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002881 }
2882
2883 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002884}
2885
bellard83f64092006-08-01 16:21:11 +00002886void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00002887{
aliguori6bbff9a2009-03-20 18:25:59 +00002888 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00002889}
2890
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08002891/* block I/O throttling */
2892static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2893 bool is_write, double elapsed_time, uint64_t *wait)
2894{
2895 uint64_t bps_limit = 0;
2896 double bytes_limit, bytes_base, bytes_res;
2897 double slice_time, wait_time;
2898
2899 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2900 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2901 } else if (bs->io_limits.bps[is_write]) {
2902 bps_limit = bs->io_limits.bps[is_write];
2903 } else {
2904 if (wait) {
2905 *wait = 0;
2906 }
2907
2908 return false;
2909 }
2910
2911 slice_time = bs->slice_end - bs->slice_start;
2912 slice_time /= (NANOSECONDS_PER_SECOND);
2913 bytes_limit = bps_limit * slice_time;
2914 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2915 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2916 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2917 }
2918
2919 /* bytes_base: the bytes of data which have been read/written; and
2920 * it is obtained from the history statistic info.
2921 * bytes_res: the remaining bytes of data which need to be read/written.
2922 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2923 * the total time for completing reading/writting all data.
2924 */
2925 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2926
2927 if (bytes_base + bytes_res <= bytes_limit) {
2928 if (wait) {
2929 *wait = 0;
2930 }
2931
2932 return false;
2933 }
2934
2935 /* Calc approx time to dispatch */
2936 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2937
2938 /* When the I/O rate at runtime exceeds the limits,
2939 * bs->slice_end need to be extended in order that the current statistic
2940 * info can be kept until the timer fire, so it is increased and tuned
2941 * based on the result of experiment.
2942 */
2943 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2944 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2945 if (wait) {
2946 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2947 }
2948
2949 return true;
2950}
2951
2952static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2953 double elapsed_time, uint64_t *wait)
2954{
2955 uint64_t iops_limit = 0;
2956 double ios_limit, ios_base;
2957 double slice_time, wait_time;
2958
2959 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2960 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2961 } else if (bs->io_limits.iops[is_write]) {
2962 iops_limit = bs->io_limits.iops[is_write];
2963 } else {
2964 if (wait) {
2965 *wait = 0;
2966 }
2967
2968 return false;
2969 }
2970
2971 slice_time = bs->slice_end - bs->slice_start;
2972 slice_time /= (NANOSECONDS_PER_SECOND);
2973 ios_limit = iops_limit * slice_time;
2974 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2975 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2976 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2977 }
2978
2979 if (ios_base + 1 <= ios_limit) {
2980 if (wait) {
2981 *wait = 0;
2982 }
2983
2984 return false;
2985 }
2986
2987 /* Calc approx time to dispatch */
2988 wait_time = (ios_base + 1) / iops_limit;
2989 if (wait_time > elapsed_time) {
2990 wait_time = wait_time - elapsed_time;
2991 } else {
2992 wait_time = 0;
2993 }
2994
2995 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2996 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2997 if (wait) {
2998 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2999 }
3000
3001 return true;
3002}
3003
3004static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3005 bool is_write, int64_t *wait)
3006{
3007 int64_t now, max_wait;
3008 uint64_t bps_wait = 0, iops_wait = 0;
3009 double elapsed_time;
3010 int bps_ret, iops_ret;
3011
3012 now = qemu_get_clock_ns(vm_clock);
3013 if ((bs->slice_start < now)
3014 && (bs->slice_end > now)) {
3015 bs->slice_end = now + bs->slice_time;
3016 } else {
3017 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3018 bs->slice_start = now;
3019 bs->slice_end = now + bs->slice_time;
3020
3021 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3022 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3023
3024 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3025 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3026 }
3027
3028 elapsed_time = now - bs->slice_start;
3029 elapsed_time /= (NANOSECONDS_PER_SECOND);
3030
3031 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3032 is_write, elapsed_time, &bps_wait);
3033 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3034 elapsed_time, &iops_wait);
3035 if (bps_ret || iops_ret) {
3036 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3037 if (wait) {
3038 *wait = max_wait;
3039 }
3040
3041 now = qemu_get_clock_ns(vm_clock);
3042 if (bs->slice_end < now + max_wait) {
3043 bs->slice_end = now + max_wait;
3044 }
3045
3046 return true;
3047 }
3048
3049 if (wait) {
3050 *wait = 0;
3051 }
3052
3053 return false;
3054}
pbrookce1a14d2006-08-07 02:38:06 +00003055
bellard83f64092006-08-01 16:21:11 +00003056/**************************************************************/
3057/* async block device emulation */
3058
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003059typedef struct BlockDriverAIOCBSync {
3060 BlockDriverAIOCB common;
3061 QEMUBH *bh;
3062 int ret;
3063 /* vector translation state */
3064 QEMUIOVector *qiov;
3065 uint8_t *bounce;
3066 int is_write;
3067} BlockDriverAIOCBSync;
3068
3069static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3070{
Kevin Wolfb666d232010-05-05 11:44:39 +02003071 BlockDriverAIOCBSync *acb =
3072 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003073 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003074 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003075 qemu_aio_release(acb);
3076}
3077
3078static AIOPool bdrv_em_aio_pool = {
3079 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3080 .cancel = bdrv_aio_cancel_em,
3081};
3082
bellard83f64092006-08-01 16:21:11 +00003083static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003084{
pbrookce1a14d2006-08-07 02:38:06 +00003085 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003086
aliguorif141eaf2009-04-07 18:43:24 +00003087 if (!acb->is_write)
3088 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003089 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003090 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003091 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003092 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003093 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003094}
bellardbeac80c2006-06-26 20:08:57 +00003095
aliguorif141eaf2009-04-07 18:43:24 +00003096static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3097 int64_t sector_num,
3098 QEMUIOVector *qiov,
3099 int nb_sectors,
3100 BlockDriverCompletionFunc *cb,
3101 void *opaque,
3102 int is_write)
3103
bellardea2384d2004-08-01 21:59:26 +00003104{
pbrookce1a14d2006-08-07 02:38:06 +00003105 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003106
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003107 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003108 acb->is_write = is_write;
3109 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003110 acb->bounce = qemu_blockalign(bs, qiov->size);
Paolo Bonzini3f3aace2011-11-14 17:50:54 +01003111 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003112
3113 if (is_write) {
3114 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003115 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003116 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003117 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003118 }
3119
pbrookce1a14d2006-08-07 02:38:06 +00003120 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003121
pbrookce1a14d2006-08-07 02:38:06 +00003122 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003123}
3124
aliguorif141eaf2009-04-07 18:43:24 +00003125static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3126 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003127 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003128{
aliguorif141eaf2009-04-07 18:43:24 +00003129 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003130}
3131
aliguorif141eaf2009-04-07 18:43:24 +00003132static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3133 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3134 BlockDriverCompletionFunc *cb, void *opaque)
3135{
3136 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3137}
3138
Kevin Wolf68485422011-06-30 10:05:46 +02003139
3140typedef struct BlockDriverAIOCBCoroutine {
3141 BlockDriverAIOCB common;
3142 BlockRequest req;
3143 bool is_write;
3144 QEMUBH* bh;
3145} BlockDriverAIOCBCoroutine;
3146
3147static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3148{
3149 qemu_aio_flush();
3150}
3151
3152static AIOPool bdrv_em_co_aio_pool = {
3153 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3154 .cancel = bdrv_aio_co_cancel_em,
3155};
3156
Paolo Bonzini35246a62011-10-14 10:41:29 +02003157static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003158{
3159 BlockDriverAIOCBCoroutine *acb = opaque;
3160
3161 acb->common.cb(acb->common.opaque, acb->req.error);
3162 qemu_bh_delete(acb->bh);
3163 qemu_aio_release(acb);
3164}
3165
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003166/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3167static void coroutine_fn bdrv_co_do_rw(void *opaque)
3168{
3169 BlockDriverAIOCBCoroutine *acb = opaque;
3170 BlockDriverState *bs = acb->common.bs;
3171
3172 if (!acb->is_write) {
3173 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003174 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003175 } else {
3176 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3177 acb->req.nb_sectors, acb->req.qiov);
3178 }
3179
Paolo Bonzini35246a62011-10-14 10:41:29 +02003180 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003181 qemu_bh_schedule(acb->bh);
3182}
3183
Kevin Wolf68485422011-06-30 10:05:46 +02003184static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3185 int64_t sector_num,
3186 QEMUIOVector *qiov,
3187 int nb_sectors,
3188 BlockDriverCompletionFunc *cb,
3189 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003190 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003191{
3192 Coroutine *co;
3193 BlockDriverAIOCBCoroutine *acb;
3194
3195 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3196 acb->req.sector = sector_num;
3197 acb->req.nb_sectors = nb_sectors;
3198 acb->req.qiov = qiov;
3199 acb->is_write = is_write;
3200
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003201 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003202 qemu_coroutine_enter(co, acb);
3203
3204 return &acb->common;
3205}
3206
Paolo Bonzini07f07612011-10-17 12:32:12 +02003207static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003208{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003209 BlockDriverAIOCBCoroutine *acb = opaque;
3210 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003211
Paolo Bonzini07f07612011-10-17 12:32:12 +02003212 acb->req.error = bdrv_co_flush(bs);
3213 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003214 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003215}
3216
Paolo Bonzini07f07612011-10-17 12:32:12 +02003217BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003218 BlockDriverCompletionFunc *cb, void *opaque)
3219{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003220 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003221
Paolo Bonzini07f07612011-10-17 12:32:12 +02003222 Coroutine *co;
3223 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003224
Paolo Bonzini07f07612011-10-17 12:32:12 +02003225 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3226 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3227 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003228
Alexander Graf016f5cf2010-05-26 17:51:49 +02003229 return &acb->common;
3230}
3231
Paolo Bonzini4265d622011-10-17 12:32:14 +02003232static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3233{
3234 BlockDriverAIOCBCoroutine *acb = opaque;
3235 BlockDriverState *bs = acb->common.bs;
3236
3237 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3238 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3239 qemu_bh_schedule(acb->bh);
3240}
3241
3242BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3243 int64_t sector_num, int nb_sectors,
3244 BlockDriverCompletionFunc *cb, void *opaque)
3245{
3246 Coroutine *co;
3247 BlockDriverAIOCBCoroutine *acb;
3248
3249 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3250
3251 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3252 acb->req.sector = sector_num;
3253 acb->req.nb_sectors = nb_sectors;
3254 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3255 qemu_coroutine_enter(co, acb);
3256
3257 return &acb->common;
3258}
3259
bellardea2384d2004-08-01 21:59:26 +00003260void bdrv_init(void)
3261{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003262 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003263}
pbrookce1a14d2006-08-07 02:38:06 +00003264
Markus Armbrustereb852012009-10-27 18:41:44 +01003265void bdrv_init_with_whitelist(void)
3266{
3267 use_bdrv_whitelist = 1;
3268 bdrv_init();
3269}
3270
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003271void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3272 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003273{
pbrookce1a14d2006-08-07 02:38:06 +00003274 BlockDriverAIOCB *acb;
3275
aliguori6bbff9a2009-03-20 18:25:59 +00003276 if (pool->free_aiocb) {
3277 acb = pool->free_aiocb;
3278 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003279 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003280 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003281 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003282 }
3283 acb->bs = bs;
3284 acb->cb = cb;
3285 acb->opaque = opaque;
3286 return acb;
3287}
3288
3289void qemu_aio_release(void *p)
3290{
aliguori6bbff9a2009-03-20 18:25:59 +00003291 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3292 AIOPool *pool = acb->pool;
3293 acb->next = pool->free_aiocb;
3294 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003295}
bellard19cb3732006-08-19 11:45:59 +00003296
3297/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003298/* Coroutine block device emulation */
3299
3300typedef struct CoroutineIOCompletion {
3301 Coroutine *coroutine;
3302 int ret;
3303} CoroutineIOCompletion;
3304
3305static void bdrv_co_io_em_complete(void *opaque, int ret)
3306{
3307 CoroutineIOCompletion *co = opaque;
3308
3309 co->ret = ret;
3310 qemu_coroutine_enter(co->coroutine, NULL);
3311}
3312
3313static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3314 int nb_sectors, QEMUIOVector *iov,
3315 bool is_write)
3316{
3317 CoroutineIOCompletion co = {
3318 .coroutine = qemu_coroutine_self(),
3319 };
3320 BlockDriverAIOCB *acb;
3321
3322 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003323 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3324 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003325 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003326 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3327 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003328 }
3329
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003330 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003331 if (!acb) {
3332 return -EIO;
3333 }
3334 qemu_coroutine_yield();
3335
3336 return co.ret;
3337}
3338
3339static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3340 int64_t sector_num, int nb_sectors,
3341 QEMUIOVector *iov)
3342{
3343 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3344}
3345
3346static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3347 int64_t sector_num, int nb_sectors,
3348 QEMUIOVector *iov)
3349{
3350 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3351}
3352
Paolo Bonzini07f07612011-10-17 12:32:12 +02003353static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003354{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003355 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003356
Paolo Bonzini07f07612011-10-17 12:32:12 +02003357 rwco->ret = bdrv_co_flush(rwco->bs);
3358}
3359
3360int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3361{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003362 int ret;
3363
Kevin Wolfca716362011-11-10 18:13:59 +01003364 if (!bs->drv) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003365 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003366 }
3367
Kevin Wolfca716362011-11-10 18:13:59 +01003368 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003369 if (bs->drv->bdrv_co_flush_to_os) {
3370 ret = bs->drv->bdrv_co_flush_to_os(bs);
3371 if (ret < 0) {
3372 return ret;
3373 }
3374 }
3375
Kevin Wolfca716362011-11-10 18:13:59 +01003376 /* But don't actually force it to the disk with cache=unsafe */
3377 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3378 return 0;
3379 }
3380
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003381 if (bs->drv->bdrv_co_flush_to_disk) {
Kevin Wolfc68b89a2011-11-10 17:25:44 +01003382 return bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003383 } else if (bs->drv->bdrv_aio_flush) {
3384 BlockDriverAIOCB *acb;
3385 CoroutineIOCompletion co = {
3386 .coroutine = qemu_coroutine_self(),
3387 };
3388
3389 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3390 if (acb == NULL) {
3391 return -EIO;
3392 } else {
3393 qemu_coroutine_yield();
3394 return co.ret;
3395 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003396 } else {
3397 /*
3398 * Some block drivers always operate in either writethrough or unsafe
3399 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3400 * know how the server works (because the behaviour is hardcoded or
3401 * depends on server-side configuration), so we can't ensure that
3402 * everything is safe on disk. Returning an error doesn't work because
3403 * that would break guests even if the server operates in writethrough
3404 * mode.
3405 *
3406 * Let's hope the user knows what he's doing.
3407 */
3408 return 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003409 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003410}
3411
Anthony Liguori0f154232011-11-14 15:09:45 -06003412void bdrv_invalidate_cache(BlockDriverState *bs)
3413{
3414 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3415 bs->drv->bdrv_invalidate_cache(bs);
3416 }
3417}
3418
3419void bdrv_invalidate_cache_all(void)
3420{
3421 BlockDriverState *bs;
3422
3423 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3424 bdrv_invalidate_cache(bs);
3425 }
3426}
3427
Paolo Bonzini07f07612011-10-17 12:32:12 +02003428int bdrv_flush(BlockDriverState *bs)
3429{
3430 Coroutine *co;
3431 RwCo rwco = {
3432 .bs = bs,
3433 .ret = NOT_DONE,
3434 };
3435
3436 if (qemu_in_coroutine()) {
3437 /* Fast-path if already in coroutine context */
3438 bdrv_flush_co_entry(&rwco);
3439 } else {
3440 co = qemu_coroutine_create(bdrv_flush_co_entry);
3441 qemu_coroutine_enter(co, &rwco);
3442 while (rwco.ret == NOT_DONE) {
3443 qemu_aio_wait();
3444 }
3445 }
3446
3447 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003448}
3449
Paolo Bonzini4265d622011-10-17 12:32:14 +02003450static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3451{
3452 RwCo *rwco = opaque;
3453
3454 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3455}
3456
3457int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3458 int nb_sectors)
3459{
3460 if (!bs->drv) {
3461 return -ENOMEDIUM;
3462 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3463 return -EIO;
3464 } else if (bs->read_only) {
3465 return -EROFS;
3466 } else if (bs->drv->bdrv_co_discard) {
3467 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3468 } else if (bs->drv->bdrv_aio_discard) {
3469 BlockDriverAIOCB *acb;
3470 CoroutineIOCompletion co = {
3471 .coroutine = qemu_coroutine_self(),
3472 };
3473
3474 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3475 bdrv_co_io_em_complete, &co);
3476 if (acb == NULL) {
3477 return -EIO;
3478 } else {
3479 qemu_coroutine_yield();
3480 return co.ret;
3481 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003482 } else {
3483 return 0;
3484 }
3485}
3486
3487int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3488{
3489 Coroutine *co;
3490 RwCo rwco = {
3491 .bs = bs,
3492 .sector_num = sector_num,
3493 .nb_sectors = nb_sectors,
3494 .ret = NOT_DONE,
3495 };
3496
3497 if (qemu_in_coroutine()) {
3498 /* Fast-path if already in coroutine context */
3499 bdrv_discard_co_entry(&rwco);
3500 } else {
3501 co = qemu_coroutine_create(bdrv_discard_co_entry);
3502 qemu_coroutine_enter(co, &rwco);
3503 while (rwco.ret == NOT_DONE) {
3504 qemu_aio_wait();
3505 }
3506 }
3507
3508 return rwco.ret;
3509}
3510
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003511/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003512/* removable device support */
3513
3514/**
3515 * Return TRUE if the media is present
3516 */
3517int bdrv_is_inserted(BlockDriverState *bs)
3518{
3519 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003520
bellard19cb3732006-08-19 11:45:59 +00003521 if (!drv)
3522 return 0;
3523 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003524 return 1;
3525 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003526}
3527
3528/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003529 * Return whether the media changed since the last call to this
3530 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003531 */
3532int bdrv_media_changed(BlockDriverState *bs)
3533{
3534 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003535
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003536 if (drv && drv->bdrv_media_changed) {
3537 return drv->bdrv_media_changed(bs);
3538 }
3539 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003540}
3541
3542/**
3543 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3544 */
Markus Armbrusterfdec4402011-09-06 18:58:45 +02003545void bdrv_eject(BlockDriverState *bs, int eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003546{
3547 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003548
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003549 if (drv && drv->bdrv_eject) {
3550 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003551 }
bellard19cb3732006-08-19 11:45:59 +00003552}
3553
bellard19cb3732006-08-19 11:45:59 +00003554/**
3555 * Lock or unlock the media (if it is locked, the user won't be able
3556 * to eject it manually).
3557 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003558void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003559{
3560 BlockDriver *drv = bs->drv;
3561
Markus Armbruster025e8492011-09-06 18:58:47 +02003562 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003563
Markus Armbruster025e8492011-09-06 18:58:47 +02003564 if (drv && drv->bdrv_lock_medium) {
3565 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003566 }
3567}
ths985a03b2007-12-24 16:10:43 +00003568
3569/* needed for generic scsi interface */
3570
3571int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3572{
3573 BlockDriver *drv = bs->drv;
3574
3575 if (drv && drv->bdrv_ioctl)
3576 return drv->bdrv_ioctl(bs, req, buf);
3577 return -ENOTSUP;
3578}
aliguori7d780662009-03-12 19:57:08 +00003579
aliguori221f7152009-03-28 17:28:41 +00003580BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3581 unsigned long int req, void *buf,
3582 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003583{
aliguori221f7152009-03-28 17:28:41 +00003584 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003585
aliguori221f7152009-03-28 17:28:41 +00003586 if (drv && drv->bdrv_aio_ioctl)
3587 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3588 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003589}
aliguorie268ca52009-04-22 20:20:00 +00003590
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003591void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3592{
3593 bs->buffer_alignment = align;
3594}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003595
aliguorie268ca52009-04-22 20:20:00 +00003596void *qemu_blockalign(BlockDriverState *bs, size_t size)
3597{
3598 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3599}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003600
3601void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3602{
3603 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003604
Liran Schouraaa0eb72010-01-26 10:31:48 +02003605 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003606 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003607 if (!bs->dirty_bitmap) {
3608 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3609 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3610 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003611
Anthony Liguori7267c092011-08-20 22:09:37 -05003612 bs->dirty_bitmap = g_malloc0(bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003613 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003614 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003615 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003616 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003617 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003618 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003619 }
3620}
3621
3622int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3623{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003624 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003625
Jan Kiszkac6d22832009-11-30 18:21:20 +01003626 if (bs->dirty_bitmap &&
3627 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003628 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3629 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003630 } else {
3631 return 0;
3632 }
3633}
3634
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003635void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3636 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003637{
3638 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3639}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003640
3641int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3642{
3643 return bs->dirty_count;
3644}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003645
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003646void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3647{
3648 assert(bs->in_use != in_use);
3649 bs->in_use = in_use;
3650}
3651
3652int bdrv_in_use(BlockDriverState *bs)
3653{
3654 return bs->in_use;
3655}
3656
Luiz Capitulino28a72822011-09-26 17:43:50 -03003657void bdrv_iostatus_enable(BlockDriverState *bs)
3658{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003659 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003660 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003661}
3662
3663/* The I/O status is only enabled if the drive explicitly
3664 * enables it _and_ the VM is configured to stop on errors */
3665bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3666{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003667 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003668 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3669 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3670 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3671}
3672
3673void bdrv_iostatus_disable(BlockDriverState *bs)
3674{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003675 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003676}
3677
3678void bdrv_iostatus_reset(BlockDriverState *bs)
3679{
3680 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003681 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003682 }
3683}
3684
3685/* XXX: Today this is set by device models because it makes the implementation
3686 quite simple. However, the block layer knows about the error, so it's
3687 possible to implement this without device models being involved */
3688void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3689{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003690 if (bdrv_iostatus_is_enabled(bs) &&
3691 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003692 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003693 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3694 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003695 }
3696}
3697
Christoph Hellwiga597e792011-08-25 08:26:01 +02003698void
3699bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3700 enum BlockAcctType type)
3701{
3702 assert(type < BDRV_MAX_IOTYPE);
3703
3704 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003705 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02003706 cookie->type = type;
3707}
3708
3709void
3710bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3711{
3712 assert(cookie->type < BDRV_MAX_IOTYPE);
3713
3714 bs->nr_bytes[cookie->type] += cookie->bytes;
3715 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003716 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02003717}
3718
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003719int bdrv_img_create(const char *filename, const char *fmt,
3720 const char *base_filename, const char *base_fmt,
3721 char *options, uint64_t img_size, int flags)
3722{
3723 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02003724 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003725 BlockDriverState *bs = NULL;
3726 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003727 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003728 int ret = 0;
3729
3730 /* Find driver and parse its options */
3731 drv = bdrv_find_format(fmt);
3732 if (!drv) {
3733 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003734 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003735 goto out;
3736 }
3737
3738 proto_drv = bdrv_find_protocol(filename);
3739 if (!proto_drv) {
3740 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003741 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003742 goto out;
3743 }
3744
3745 create_options = append_option_parameters(create_options,
3746 drv->create_options);
3747 create_options = append_option_parameters(create_options,
3748 proto_drv->create_options);
3749
3750 /* Create parameter list with default values */
3751 param = parse_option_parameters("", create_options, param);
3752
3753 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3754
3755 /* Parse -o options */
3756 if (options) {
3757 param = parse_option_parameters(options, create_options, param);
3758 if (param == NULL) {
3759 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003760 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003761 goto out;
3762 }
3763 }
3764
3765 if (base_filename) {
3766 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3767 base_filename)) {
3768 error_report("Backing file not supported for file format '%s'",
3769 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003770 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003771 goto out;
3772 }
3773 }
3774
3775 if (base_fmt) {
3776 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3777 error_report("Backing file format not supported for file "
3778 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003779 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003780 goto out;
3781 }
3782 }
3783
Jes Sorensen792da932010-12-16 13:52:17 +01003784 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3785 if (backing_file && backing_file->value.s) {
3786 if (!strcmp(filename, backing_file->value.s)) {
3787 error_report("Error: Trying to create an image with the "
3788 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003789 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01003790 goto out;
3791 }
3792 }
3793
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003794 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3795 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003796 backing_drv = bdrv_find_format(backing_fmt->value.s);
3797 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003798 error_report("Unknown backing file format '%s'",
3799 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003800 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003801 goto out;
3802 }
3803 }
3804
3805 // The size for the image must always be specified, with one exception:
3806 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02003807 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3808 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003809 if (backing_file && backing_file->value.s) {
3810 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003811 char buf[32];
3812
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003813 bs = bdrv_new("");
3814
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003815 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003816 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003817 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003818 goto out;
3819 }
3820 bdrv_get_geometry(bs, &size);
3821 size *= 512;
3822
3823 snprintf(buf, sizeof(buf), "%" PRId64, size);
3824 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3825 } else {
3826 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003827 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003828 goto out;
3829 }
3830 }
3831
3832 printf("Formatting '%s', fmt=%s ", filename, fmt);
3833 print_option_parameters(param);
3834 puts("");
3835
3836 ret = bdrv_create(drv, filename, param);
3837
3838 if (ret < 0) {
3839 if (ret == -ENOTSUP) {
3840 error_report("Formatting or formatting option not supported for "
3841 "file format '%s'", fmt);
3842 } else if (ret == -EFBIG) {
3843 error_report("The image size is too large for file format '%s'",
3844 fmt);
3845 } else {
3846 error_report("%s: error while creating %s: %s", filename, fmt,
3847 strerror(-ret));
3848 }
3849 }
3850
3851out:
3852 free_option_parameters(create_options);
3853 free_option_parameters(param);
3854
3855 if (bs) {
3856 bdrv_delete(bs);
3857 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01003858
3859 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003860}
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00003861
3862void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
3863 BlockDriverCompletionFunc *cb, void *opaque)
3864{
3865 BlockJob *job;
3866
3867 if (bs->job || bdrv_in_use(bs)) {
3868 return NULL;
3869 }
3870 bdrv_set_in_use(bs, 1);
3871
3872 job = g_malloc0(job_type->instance_size);
3873 job->job_type = job_type;
3874 job->bs = bs;
3875 job->cb = cb;
3876 job->opaque = opaque;
3877 bs->job = job;
3878 return job;
3879}
3880
3881void block_job_complete(BlockJob *job, int ret)
3882{
3883 BlockDriverState *bs = job->bs;
3884
3885 assert(bs->job == job);
3886 job->cb(job->opaque, ret);
3887 bs->job = NULL;
3888 g_free(job);
3889 bdrv_set_in_use(bs, 0);
3890}
3891
3892int block_job_set_speed(BlockJob *job, int64_t value)
3893{
3894 if (!job->job_type->set_speed) {
3895 return -ENOTSUP;
3896 }
3897 return job->job_type->set_speed(job, value);
3898}
3899
3900void block_job_cancel(BlockJob *job)
3901{
3902 job->cancelled = true;
3903}
3904
3905bool block_job_is_cancelled(BlockJob *job)
3906{
3907 return job->cancelled;
3908}