blob: 8858be096bf68c5fc46dfe839bcf021b6985f377 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000051typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000053 BDRV_REQ_ZERO_WRITE = 0x2,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000054} BdrvRequestFlags;
55
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020056static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000057static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
58 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000059 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000060static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000062 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020063static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010069static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000070 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
71 BdrvRequestFlags flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010072static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000073 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010075static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
76 int64_t sector_num,
77 QEMUIOVector *qiov,
78 int nb_sectors,
79 BlockDriverCompletionFunc *cb,
80 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010081 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010082static void coroutine_fn bdrv_co_do_rw(void *opaque);
bellardec530c82006-04-25 22:36:06 +000083
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080084static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
85 bool is_write, double elapsed_time, uint64_t *wait);
86static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
87 double elapsed_time, uint64_t *wait);
88static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
89 bool is_write, int64_t *wait);
90
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010091static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000093
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010094static QLIST_HEAD(, BlockDriver) bdrv_drivers =
95 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000096
Markus Armbrusterf9092b12010-06-25 10:33:39 +020097/* The device to use for VM snapshots */
98static BlockDriverState *bs_snapshots;
99
Markus Armbrustereb852012009-10-27 18:41:44 +0100100/* If non-zero, use only whitelisted block drivers */
101static int use_bdrv_whitelist;
102
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000103#ifdef _WIN32
104static int is_windows_drive_prefix(const char *filename)
105{
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109}
110
111int is_windows_drive(const char *filename)
112{
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120}
121#endif
122
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800123/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800124void bdrv_io_limits_disable(BlockDriverState *bs)
125{
126 bs->io_limits_enabled = false;
127
128 while (qemu_co_queue_next(&bs->throttled_reqs));
129
130 if (bs->block_timer) {
131 qemu_del_timer(bs->block_timer);
132 qemu_free_timer(bs->block_timer);
133 bs->block_timer = NULL;
134 }
135
136 bs->slice_start = 0;
137 bs->slice_end = 0;
138 bs->slice_time = 0;
139 memset(&bs->io_base, 0, sizeof(bs->io_base));
140}
141
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800142static void bdrv_block_timer(void *opaque)
143{
144 BlockDriverState *bs = opaque;
145
146 qemu_co_queue_next(&bs->throttled_reqs);
147}
148
149void bdrv_io_limits_enable(BlockDriverState *bs)
150{
151 qemu_co_queue_init(&bs->throttled_reqs);
152 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
153 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
154 bs->slice_start = qemu_get_clock_ns(vm_clock);
155 bs->slice_end = bs->slice_start + bs->slice_time;
156 memset(&bs->io_base, 0, sizeof(bs->io_base));
157 bs->io_limits_enabled = true;
158}
159
160bool bdrv_io_limits_enabled(BlockDriverState *bs)
161{
162 BlockIOLimit *io_limits = &bs->io_limits;
163 return io_limits->bps[BLOCK_IO_LIMIT_READ]
164 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
165 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
166 || io_limits->iops[BLOCK_IO_LIMIT_READ]
167 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
168 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
169}
170
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800171static void bdrv_io_limits_intercept(BlockDriverState *bs,
172 bool is_write, int nb_sectors)
173{
174 int64_t wait_time = -1;
175
176 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
177 qemu_co_queue_wait(&bs->throttled_reqs);
178 }
179
180 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
181 * throttled requests will not be dequeued until the current request is
182 * allowed to be serviced. So if the current request still exceeds the
183 * limits, it will be inserted to the head. All requests followed it will
184 * be still in throttled_reqs queue.
185 */
186
187 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
188 qemu_mod_timer(bs->block_timer,
189 wait_time + qemu_get_clock_ns(vm_clock));
190 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
191 }
192
193 qemu_co_queue_next(&bs->throttled_reqs);
194}
195
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000196/* check if the path starts with "<protocol>:" */
197static int path_has_protocol(const char *path)
198{
199#ifdef _WIN32
200 if (is_windows_drive(path) ||
201 is_windows_drive_prefix(path)) {
202 return 0;
203 }
204#endif
205
206 return strchr(path, ':') != NULL;
207}
208
bellard83f64092006-08-01 16:21:11 +0000209int path_is_absolute(const char *path)
210{
211 const char *p;
bellard21664422007-01-07 18:22:37 +0000212#ifdef _WIN32
213 /* specific case for names like: "\\.\d:" */
214 if (*path == '/' || *path == '\\')
215 return 1;
216#endif
bellard83f64092006-08-01 16:21:11 +0000217 p = strchr(path, ':');
218 if (p)
219 p++;
220 else
221 p = path;
bellard3b9f94e2007-01-07 17:27:07 +0000222#ifdef _WIN32
223 return (*p == '/' || *p == '\\');
224#else
225 return (*p == '/');
226#endif
bellard83f64092006-08-01 16:21:11 +0000227}
228
229/* if filename is absolute, just copy it to dest. Otherwise, build a
230 path to it by considering it is relative to base_path. URL are
231 supported. */
232void path_combine(char *dest, int dest_size,
233 const char *base_path,
234 const char *filename)
235{
236 const char *p, *p1;
237 int len;
238
239 if (dest_size <= 0)
240 return;
241 if (path_is_absolute(filename)) {
242 pstrcpy(dest, dest_size, filename);
243 } else {
244 p = strchr(base_path, ':');
245 if (p)
246 p++;
247 else
248 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000249 p1 = strrchr(base_path, '/');
250#ifdef _WIN32
251 {
252 const char *p2;
253 p2 = strrchr(base_path, '\\');
254 if (!p1 || p2 > p1)
255 p1 = p2;
256 }
257#endif
bellard83f64092006-08-01 16:21:11 +0000258 if (p1)
259 p1++;
260 else
261 p1 = base_path;
262 if (p1 > p)
263 p = p1;
264 len = p - base_path;
265 if (len > dest_size - 1)
266 len = dest_size - 1;
267 memcpy(dest, base_path, len);
268 dest[len] = '\0';
269 pstrcat(dest, dest_size, filename);
270 }
271}
272
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500273void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000274{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100275 /* Block drivers without coroutine functions need emulation */
276 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200277 bdrv->bdrv_co_readv = bdrv_co_readv_em;
278 bdrv->bdrv_co_writev = bdrv_co_writev_em;
279
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100280 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
281 * the block driver lacks aio we need to emulate that too.
282 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200283 if (!bdrv->bdrv_aio_readv) {
284 /* add AIO emulation layer */
285 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
286 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200287 }
bellard83f64092006-08-01 16:21:11 +0000288 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200289
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100290 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000291}
bellardb3380822004-03-14 21:38:54 +0000292
293/* create a new block device (by default it is empty) */
294BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000295{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100296 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000297
Anthony Liguori7267c092011-08-20 22:09:37 -0500298 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000299 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000300 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100301 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000302 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300303 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000304 return bs;
305}
306
bellardea2384d2004-08-01 21:59:26 +0000307BlockDriver *bdrv_find_format(const char *format_name)
308{
309 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100310 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
311 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000312 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100313 }
bellardea2384d2004-08-01 21:59:26 +0000314 }
315 return NULL;
316}
317
Markus Armbrustereb852012009-10-27 18:41:44 +0100318static int bdrv_is_whitelisted(BlockDriver *drv)
319{
320 static const char *whitelist[] = {
321 CONFIG_BDRV_WHITELIST
322 };
323 const char **p;
324
325 if (!whitelist[0])
326 return 1; /* no whitelist, anything goes */
327
328 for (p = whitelist; *p; p++) {
329 if (!strcmp(drv->format_name, *p)) {
330 return 1;
331 }
332 }
333 return 0;
334}
335
336BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
337{
338 BlockDriver *drv = bdrv_find_format(format_name);
339 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
340}
341
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200342int bdrv_create(BlockDriver *drv, const char* filename,
343 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000344{
345 if (!drv->bdrv_create)
346 return -ENOTSUP;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200347
348 return drv->bdrv_create(filename, options);
bellardea2384d2004-08-01 21:59:26 +0000349}
350
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200351int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
352{
353 BlockDriver *drv;
354
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900355 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200356 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000357 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200358 }
359
360 return bdrv_create(drv, filename, options);
361}
362
bellardd5249392004-08-03 21:14:23 +0000363#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000364void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000365{
bellard3b9f94e2007-01-07 17:27:07 +0000366 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000367
bellard3b9f94e2007-01-07 17:27:07 +0000368 GetTempPath(MAX_PATH, temp_dir);
369 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000370}
371#else
bellard95389c82005-12-18 18:28:15 +0000372void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000373{
374 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000375 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000376 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000377 tmpdir = getenv("TMPDIR");
378 if (!tmpdir)
379 tmpdir = "/tmp";
380 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000381 fd = mkstemp(filename);
382 close(fd);
383}
bellardd5249392004-08-03 21:14:23 +0000384#endif
bellardea2384d2004-08-01 21:59:26 +0000385
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200386/*
387 * Detect host devices. By convention, /dev/cdrom[N] is always
388 * recognized as a host CDROM.
389 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200390static BlockDriver *find_hdev_driver(const char *filename)
391{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200392 int score_max = 0, score;
393 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200394
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100395 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200396 if (d->bdrv_probe_device) {
397 score = d->bdrv_probe_device(filename);
398 if (score > score_max) {
399 score_max = score;
400 drv = d;
401 }
402 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200403 }
404
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200405 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200406}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200407
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900408BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200409{
410 BlockDriver *drv1;
411 char protocol[128];
412 int len;
413 const char *p;
414
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200415 /* TODO Drivers without bdrv_file_open must be specified explicitly */
416
Christoph Hellwig39508e72010-06-23 12:25:17 +0200417 /*
418 * XXX(hch): we really should not let host device detection
419 * override an explicit protocol specification, but moving this
420 * later breaks access to device names with colons in them.
421 * Thanks to the brain-dead persistent naming schemes on udev-
422 * based Linux systems those actually are quite common.
423 */
424 drv1 = find_hdev_driver(filename);
425 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200426 return drv1;
427 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200428
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000429 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200430 return bdrv_find_format("file");
431 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000432 p = strchr(filename, ':');
433 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200434 len = p - filename;
435 if (len > sizeof(protocol) - 1)
436 len = sizeof(protocol) - 1;
437 memcpy(protocol, filename, len);
438 protocol[len] = '\0';
439 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
440 if (drv1->protocol_name &&
441 !strcmp(drv1->protocol_name, protocol)) {
442 return drv1;
443 }
444 }
445 return NULL;
446}
447
Stefan Weilc98ac352010-07-21 21:51:51 +0200448static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000449{
bellard83f64092006-08-01 16:21:11 +0000450 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000451 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000452 uint8_t buf[2048];
453 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000454
Naphtali Spreif5edb012010-01-17 16:48:13 +0200455 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200456 if (ret < 0) {
457 *pdrv = NULL;
458 return ret;
459 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700460
Kevin Wolf08a00552010-06-01 18:37:31 +0200461 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
462 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700463 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200464 drv = bdrv_find_format("raw");
465 if (!drv) {
466 ret = -ENOENT;
467 }
468 *pdrv = drv;
469 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700470 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700471
bellard83f64092006-08-01 16:21:11 +0000472 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
473 bdrv_delete(bs);
474 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200475 *pdrv = NULL;
476 return ret;
bellard83f64092006-08-01 16:21:11 +0000477 }
478
bellardea2384d2004-08-01 21:59:26 +0000479 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200480 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100481 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000482 if (drv1->bdrv_probe) {
483 score = drv1->bdrv_probe(buf, ret, filename);
484 if (score > score_max) {
485 score_max = score;
486 drv = drv1;
487 }
bellardea2384d2004-08-01 21:59:26 +0000488 }
489 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200490 if (!drv) {
491 ret = -ENOENT;
492 }
493 *pdrv = drv;
494 return ret;
bellardea2384d2004-08-01 21:59:26 +0000495}
496
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100497/**
498 * Set the current 'total_sectors' value
499 */
500static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
501{
502 BlockDriver *drv = bs->drv;
503
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700504 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
505 if (bs->sg)
506 return 0;
507
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100508 /* query actual device if possible, otherwise just trust the hint */
509 if (drv->bdrv_getlength) {
510 int64_t length = drv->bdrv_getlength(bs);
511 if (length < 0) {
512 return length;
513 }
514 hint = length >> BDRV_SECTOR_BITS;
515 }
516
517 bs->total_sectors = hint;
518 return 0;
519}
520
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100521/**
522 * Set open flags for a given cache mode
523 *
524 * Return 0 on success, -1 if the cache mode was invalid.
525 */
526int bdrv_parse_cache_flags(const char *mode, int *flags)
527{
528 *flags &= ~BDRV_O_CACHE_MASK;
529
530 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
531 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100532 } else if (!strcmp(mode, "directsync")) {
533 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100534 } else if (!strcmp(mode, "writeback")) {
535 *flags |= BDRV_O_CACHE_WB;
536 } else if (!strcmp(mode, "unsafe")) {
537 *flags |= BDRV_O_CACHE_WB;
538 *flags |= BDRV_O_NO_FLUSH;
539 } else if (!strcmp(mode, "writethrough")) {
540 /* this is the default */
541 } else {
542 return -1;
543 }
544
545 return 0;
546}
547
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000548/**
549 * The copy-on-read flag is actually a reference count so multiple users may
550 * use the feature without worrying about clobbering its previous state.
551 * Copy-on-read stays enabled until all users have called to disable it.
552 */
553void bdrv_enable_copy_on_read(BlockDriverState *bs)
554{
555 bs->copy_on_read++;
556}
557
558void bdrv_disable_copy_on_read(BlockDriverState *bs)
559{
560 assert(bs->copy_on_read > 0);
561 bs->copy_on_read--;
562}
563
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200564/*
Kevin Wolf57915332010-04-14 15:24:50 +0200565 * Common part for opening disk images and files
566 */
567static int bdrv_open_common(BlockDriverState *bs, const char *filename,
568 int flags, BlockDriver *drv)
569{
570 int ret, open_flags;
571
572 assert(drv != NULL);
573
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100574 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
575
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200576 bs->file = NULL;
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100577 bs->total_sectors = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200578 bs->encrypted = 0;
579 bs->valid_key = 0;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100580 bs->sg = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200581 bs->open_flags = flags;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100582 bs->growable = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200583 bs->buffer_alignment = 512;
584
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000585 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
586 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
587 bdrv_enable_copy_on_read(bs);
588 }
589
Kevin Wolf57915332010-04-14 15:24:50 +0200590 pstrcpy(bs->filename, sizeof(bs->filename), filename);
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100591 bs->backing_file[0] = '\0';
Kevin Wolf57915332010-04-14 15:24:50 +0200592
593 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
594 return -ENOTSUP;
595 }
596
597 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500598 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200599
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100600 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200601
602 /*
603 * Clear flags that are internal to the block layer before opening the
604 * image.
605 */
606 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
607
608 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200609 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200610 */
611 if (bs->is_temporary) {
612 open_flags |= BDRV_O_RDWR;
613 }
614
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100615 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
616
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200617 /* Open the image, either directly or using a protocol */
618 if (drv->bdrv_file_open) {
619 ret = drv->bdrv_file_open(bs, filename, open_flags);
620 } else {
621 ret = bdrv_file_open(&bs->file, filename, open_flags);
622 if (ret >= 0) {
623 ret = drv->bdrv_open(bs, open_flags);
624 }
625 }
626
Kevin Wolf57915332010-04-14 15:24:50 +0200627 if (ret < 0) {
628 goto free_and_fail;
629 }
630
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100631 ret = refresh_total_sectors(bs, bs->total_sectors);
632 if (ret < 0) {
633 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200634 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100635
Kevin Wolf57915332010-04-14 15:24:50 +0200636#ifndef _WIN32
637 if (bs->is_temporary) {
638 unlink(filename);
639 }
640#endif
641 return 0;
642
643free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200644 if (bs->file) {
645 bdrv_delete(bs->file);
646 bs->file = NULL;
647 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500648 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200649 bs->opaque = NULL;
650 bs->drv = NULL;
651 return ret;
652}
653
654/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200655 * Opens a file using a protocol (file, host_device, nbd, ...)
656 */
bellard83f64092006-08-01 16:21:11 +0000657int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000658{
bellard83f64092006-08-01 16:21:11 +0000659 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200660 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000661 int ret;
662
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900663 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200664 if (!drv) {
665 return -ENOENT;
666 }
667
bellard83f64092006-08-01 16:21:11 +0000668 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200669 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000670 if (ret < 0) {
671 bdrv_delete(bs);
672 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000673 }
aliguori71d07702009-03-03 17:37:16 +0000674 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000675 *pbs = bs;
676 return 0;
bellardea2384d2004-08-01 21:59:26 +0000677}
bellardfc01f7e2003-06-30 10:03:06 +0000678
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200679/*
680 * Opens a disk image (raw, qcow2, vmdk, ...)
681 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200682int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
683 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000684{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200685 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200686 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000687
bellard83f64092006-08-01 16:21:11 +0000688 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000689 BlockDriverState *bs1;
690 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000691 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200692 BlockDriver *bdrv_qcow2;
693 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200694 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000695
bellardea2384d2004-08-01 21:59:26 +0000696 /* if snapshot, we create a temporary backing file and open it
697 instead of opening 'filename' directly */
698
699 /* if there is a backing file, use it */
700 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200701 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000702 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000703 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000704 return ret;
bellardea2384d2004-08-01 21:59:26 +0000705 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200706 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000707
708 if (bs1->drv && bs1->drv->protocol_name)
709 is_protocol = 1;
710
bellardea2384d2004-08-01 21:59:26 +0000711 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000712
bellardea2384d2004-08-01 21:59:26 +0000713 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000714
715 /* Real path is meaningless for protocols */
716 if (is_protocol)
717 snprintf(backing_filename, sizeof(backing_filename),
718 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000719 else if (!realpath(filename, backing_filename))
720 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000721
Kevin Wolf91a073a2009-05-27 14:48:06 +0200722 bdrv_qcow2 = bdrv_find_format("qcow2");
723 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
724
Jes Sorensen3e829902010-05-27 16:20:30 +0200725 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200726 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
727 if (drv) {
728 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
729 drv->format_name);
730 }
731
732 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200733 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000734 if (ret < 0) {
735 return ret;
bellardea2384d2004-08-01 21:59:26 +0000736 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200737
bellardea2384d2004-08-01 21:59:26 +0000738 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200739 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000740 bs->is_temporary = 1;
741 }
bellard712e7872005-04-28 21:09:32 +0000742
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200743 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200744 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200745 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000746 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100747
aliguori51d7c002009-03-05 23:00:29 +0000748 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000749 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000750 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200751
752 /* Open the image */
753 ret = bdrv_open_common(bs, filename, flags, drv);
754 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100755 goto unlink_and_fail;
756 }
757
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200758 /* If there is a backing file, use it */
759 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
760 char backing_filename[PATH_MAX];
761 int back_flags;
762 BlockDriver *back_drv = NULL;
763
764 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000765
766 if (path_has_protocol(bs->backing_file)) {
767 pstrcpy(backing_filename, sizeof(backing_filename),
768 bs->backing_file);
769 } else {
770 path_combine(backing_filename, sizeof(backing_filename),
771 filename, bs->backing_file);
772 }
773
774 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200775 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000776 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200777
778 /* backing files always opened read-only */
779 back_flags =
780 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
781
782 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
783 if (ret < 0) {
784 bdrv_close(bs);
785 return ret;
786 }
787 if (bs->is_temporary) {
788 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
789 } else {
790 /* base image inherits from "parent" */
791 bs->backing_hd->keep_read_only = bs->keep_read_only;
792 }
793 }
794
795 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200796 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200797 }
798
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800799 /* throttling disk I/O limits */
800 if (bs->io_limits_enabled) {
801 bdrv_io_limits_enable(bs);
802 }
803
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200804 return 0;
805
806unlink_and_fail:
807 if (bs->is_temporary) {
808 unlink(filename);
809 }
810 return ret;
811}
812
bellardfc01f7e2003-06-30 10:03:06 +0000813void bdrv_close(BlockDriverState *bs)
814{
bellard19cb3732006-08-19 11:45:59 +0000815 if (bs->drv) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200816 if (bs == bs_snapshots) {
817 bs_snapshots = NULL;
818 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100819 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000820 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100821 bs->backing_hd = NULL;
822 }
bellardea2384d2004-08-01 21:59:26 +0000823 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500824 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000825#ifdef _WIN32
826 if (bs->is_temporary) {
827 unlink(bs->filename);
828 }
bellard67b915a2004-03-31 23:37:16 +0000829#endif
bellardea2384d2004-08-01 21:59:26 +0000830 bs->opaque = NULL;
831 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000832 bs->copy_on_read = 0;
bellardb3380822004-03-14 21:38:54 +0000833
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200834 if (bs->file != NULL) {
835 bdrv_close(bs->file);
836 }
837
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200838 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000839 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800840
841 /*throttling disk I/O limits*/
842 if (bs->io_limits_enabled) {
843 bdrv_io_limits_disable(bs);
844 }
bellardb3380822004-03-14 21:38:54 +0000845}
846
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900847void bdrv_close_all(void)
848{
849 BlockDriverState *bs;
850
851 QTAILQ_FOREACH(bs, &bdrv_states, list) {
852 bdrv_close(bs);
853 }
854}
855
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000856/*
857 * Wait for pending requests to complete across all BlockDriverStates
858 *
859 * This function does not flush data to disk, use bdrv_flush_all() for that
860 * after calling this function.
861 */
862void bdrv_drain_all(void)
863{
864 BlockDriverState *bs;
865
866 qemu_aio_flush();
867
868 /* If requests are still pending there is a bug somewhere */
869 QTAILQ_FOREACH(bs, &bdrv_states, list) {
870 assert(QLIST_EMPTY(&bs->tracked_requests));
871 assert(qemu_co_queue_empty(&bs->throttled_reqs));
872 }
873}
874
Ryan Harperd22b2f42011-03-29 20:51:47 -0500875/* make a BlockDriverState anonymous by removing from bdrv_state list.
876 Also, NULL terminate the device_name to prevent double remove */
877void bdrv_make_anon(BlockDriverState *bs)
878{
879 if (bs->device_name[0] != '\0') {
880 QTAILQ_REMOVE(&bdrv_states, bs, list);
881 }
882 bs->device_name[0] = '\0';
883}
884
Jeff Cody8802d1f2012-02-28 15:54:06 -0500885/*
886 * Add new bs contents at the top of an image chain while the chain is
887 * live, while keeping required fields on the top layer.
888 *
889 * This will modify the BlockDriverState fields, and swap contents
890 * between bs_new and bs_top. Both bs_new and bs_top are modified.
891 *
892 * This function does not create any image files.
893 */
894void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
895{
896 BlockDriverState tmp;
897
898 /* the new bs must not be in bdrv_states */
899 bdrv_make_anon(bs_new);
900
901 tmp = *bs_new;
902
903 /* there are some fields that need to stay on the top layer: */
904
905 /* dev info */
906 tmp.dev_ops = bs_top->dev_ops;
907 tmp.dev_opaque = bs_top->dev_opaque;
908 tmp.dev = bs_top->dev;
909 tmp.buffer_alignment = bs_top->buffer_alignment;
910 tmp.copy_on_read = bs_top->copy_on_read;
911
912 /* i/o timing parameters */
913 tmp.slice_time = bs_top->slice_time;
914 tmp.slice_start = bs_top->slice_start;
915 tmp.slice_end = bs_top->slice_end;
916 tmp.io_limits = bs_top->io_limits;
917 tmp.io_base = bs_top->io_base;
918 tmp.throttled_reqs = bs_top->throttled_reqs;
919 tmp.block_timer = bs_top->block_timer;
920 tmp.io_limits_enabled = bs_top->io_limits_enabled;
921
922 /* geometry */
923 tmp.cyls = bs_top->cyls;
924 tmp.heads = bs_top->heads;
925 tmp.secs = bs_top->secs;
926 tmp.translation = bs_top->translation;
927
928 /* r/w error */
929 tmp.on_read_error = bs_top->on_read_error;
930 tmp.on_write_error = bs_top->on_write_error;
931
932 /* i/o status */
933 tmp.iostatus_enabled = bs_top->iostatus_enabled;
934 tmp.iostatus = bs_top->iostatus;
935
936 /* keep the same entry in bdrv_states */
937 pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
938 tmp.list = bs_top->list;
939
940 /* The contents of 'tmp' will become bs_top, as we are
941 * swapping bs_new and bs_top contents. */
942 tmp.backing_hd = bs_new;
943 pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
944
945 /* swap contents of the fixed new bs and the current top */
946 *bs_new = *bs_top;
947 *bs_top = tmp;
948
949 /* clear the copied fields in the new backing file */
950 bdrv_detach_dev(bs_new, bs_new->dev);
951
952 qemu_co_queue_init(&bs_new->throttled_reqs);
953 memset(&bs_new->io_base, 0, sizeof(bs_new->io_base));
954 memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
955 bdrv_iostatus_disable(bs_new);
956
957 /* we don't use bdrv_io_limits_disable() for this, because we don't want
958 * to affect or delete the block_timer, as it has been moved to bs_top */
959 bs_new->io_limits_enabled = false;
960 bs_new->block_timer = NULL;
961 bs_new->slice_time = 0;
962 bs_new->slice_start = 0;
963 bs_new->slice_end = 0;
964}
965
bellardb3380822004-03-14 21:38:54 +0000966void bdrv_delete(BlockDriverState *bs)
967{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200968 assert(!bs->dev);
Markus Armbruster18846de2010-06-29 16:58:30 +0200969
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100970 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -0500971 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +0000972
bellardb3380822004-03-14 21:38:54 +0000973 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200974 if (bs->file != NULL) {
975 bdrv_delete(bs->file);
976 }
977
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200978 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -0500979 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +0000980}
981
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200982int bdrv_attach_dev(BlockDriverState *bs, void *dev)
983/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +0200984{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200985 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +0200986 return -EBUSY;
987 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200988 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -0300989 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +0200990 return 0;
991}
992
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200993/* TODO qdevified devices don't use this, remove when devices are qdevified */
994void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +0200995{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200996 if (bdrv_attach_dev(bs, dev) < 0) {
997 abort();
998 }
999}
1000
1001void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1002/* TODO change to DeviceState *dev when all users are qdevified */
1003{
1004 assert(bs->dev == dev);
1005 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02001006 bs->dev_ops = NULL;
1007 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +02001008 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +02001009}
1010
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001011/* TODO change to return DeviceState * when all users are qdevified */
1012void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02001013{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001014 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02001015}
1016
Markus Armbruster0e49de52011-08-03 15:07:41 +02001017void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1018 void *opaque)
1019{
1020 bs->dev_ops = ops;
1021 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001022 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1023 bs_snapshots = NULL;
1024 }
Markus Armbruster0e49de52011-08-03 15:07:41 +02001025}
1026
Luiz Capitulino329c0a42012-01-25 16:59:43 -02001027void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1028 BlockQMPEventAction action, int is_read)
1029{
1030 QObject *data;
1031 const char *action_str;
1032
1033 switch (action) {
1034 case BDRV_ACTION_REPORT:
1035 action_str = "report";
1036 break;
1037 case BDRV_ACTION_IGNORE:
1038 action_str = "ignore";
1039 break;
1040 case BDRV_ACTION_STOP:
1041 action_str = "stop";
1042 break;
1043 default:
1044 abort();
1045 }
1046
1047 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1048 bdrv->device_name,
1049 action_str,
1050 is_read ? "read" : "write");
1051 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1052
1053 qobject_decref(data);
1054}
1055
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001056static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1057{
1058 QObject *data;
1059
1060 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1061 bdrv_get_device_name(bs), ejected);
1062 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1063
1064 qobject_decref(data);
1065}
1066
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001067static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02001068{
Markus Armbruster145feb12011-08-03 15:07:42 +02001069 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001070 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001071 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001072 if (tray_was_closed) {
1073 /* tray open */
1074 bdrv_emit_qmp_eject_event(bs, true);
1075 }
1076 if (load) {
1077 /* tray close */
1078 bdrv_emit_qmp_eject_event(bs, false);
1079 }
Markus Armbruster145feb12011-08-03 15:07:42 +02001080 }
1081}
1082
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001083bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1084{
1085 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1086}
1087
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01001088void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1089{
1090 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1091 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1092 }
1093}
1094
Markus Armbrustere4def802011-09-06 18:58:53 +02001095bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1096{
1097 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1098 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1099 }
1100 return false;
1101}
1102
Markus Armbruster145feb12011-08-03 15:07:42 +02001103static void bdrv_dev_resize_cb(BlockDriverState *bs)
1104{
1105 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1106 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02001107 }
1108}
1109
Markus Armbrusterf1076392011-09-06 18:58:46 +02001110bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1111{
1112 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1113 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1114 }
1115 return false;
1116}
1117
aliguorie97fc192009-04-21 23:11:50 +00001118/*
1119 * Run consistency checks on an image
1120 *
Kevin Wolfe076f332010-06-29 11:43:13 +02001121 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02001122 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02001123 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00001124 */
Kevin Wolfe076f332010-06-29 11:43:13 +02001125int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +00001126{
1127 if (bs->drv->bdrv_check == NULL) {
1128 return -ENOTSUP;
1129 }
1130
Kevin Wolfe076f332010-06-29 11:43:13 +02001131 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +02001132 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +00001133}
1134
Kevin Wolf8a426612010-07-16 17:17:01 +02001135#define COMMIT_BUF_SECTORS 2048
1136
bellard33e39632003-07-06 17:15:21 +00001137/* commit COW file into the raw image */
1138int bdrv_commit(BlockDriverState *bs)
1139{
bellard19cb3732006-08-19 11:45:59 +00001140 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +02001141 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +02001142 int64_t sector, total_sectors;
1143 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001144 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +02001145 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001146 char filename[1024];
1147 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +00001148
bellard19cb3732006-08-19 11:45:59 +00001149 if (!drv)
1150 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001151
1152 if (!bs->backing_hd) {
1153 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00001154 }
1155
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001156 if (bs->backing_hd->keep_read_only) {
1157 return -EACCES;
1158 }
Kevin Wolfee181192010-08-05 13:05:22 +02001159
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00001160 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1161 return -EBUSY;
1162 }
1163
Kevin Wolfee181192010-08-05 13:05:22 +02001164 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001165 ro = bs->backing_hd->read_only;
1166 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1167 open_flags = bs->backing_hd->open_flags;
1168
1169 if (ro) {
1170 /* re-open as RW */
1171 bdrv_delete(bs->backing_hd);
1172 bs->backing_hd = NULL;
1173 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001174 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1175 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001176 if (rw_ret < 0) {
1177 bdrv_delete(bs_rw);
1178 /* try to re-open read-only */
1179 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001180 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1181 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001182 if (ret < 0) {
1183 bdrv_delete(bs_ro);
1184 /* drive not functional anymore */
1185 bs->drv = NULL;
1186 return ret;
1187 }
1188 bs->backing_hd = bs_ro;
1189 return rw_ret;
1190 }
1191 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001192 }
bellardea2384d2004-08-01 21:59:26 +00001193
Jan Kiszka6ea44302009-11-30 18:21:19 +01001194 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001195 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001196
Kevin Wolf8a426612010-07-16 17:17:01 +02001197 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001198 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001199
1200 if (bdrv_read(bs, sector, buf, n) != 0) {
1201 ret = -EIO;
1202 goto ro_cleanup;
1203 }
1204
1205 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1206 ret = -EIO;
1207 goto ro_cleanup;
1208 }
bellardea2384d2004-08-01 21:59:26 +00001209 }
1210 }
bellard95389c82005-12-18 18:28:15 +00001211
Christoph Hellwig1d449522010-01-17 12:32:30 +01001212 if (drv->bdrv_make_empty) {
1213 ret = drv->bdrv_make_empty(bs);
1214 bdrv_flush(bs);
1215 }
bellard95389c82005-12-18 18:28:15 +00001216
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001217 /*
1218 * Make sure all data we wrote to the backing device is actually
1219 * stable on disk.
1220 */
1221 if (bs->backing_hd)
1222 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001223
1224ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001225 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001226
1227 if (ro) {
1228 /* re-open as RO */
1229 bdrv_delete(bs->backing_hd);
1230 bs->backing_hd = NULL;
1231 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001232 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1233 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001234 if (ret < 0) {
1235 bdrv_delete(bs_ro);
1236 /* drive not functional anymore */
1237 bs->drv = NULL;
1238 return ret;
1239 }
1240 bs->backing_hd = bs_ro;
1241 bs->backing_hd->keep_read_only = 0;
1242 }
1243
Christoph Hellwig1d449522010-01-17 12:32:30 +01001244 return ret;
bellard33e39632003-07-06 17:15:21 +00001245}
1246
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001247int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001248{
1249 BlockDriverState *bs;
1250
1251 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001252 int ret = bdrv_commit(bs);
1253 if (ret < 0) {
1254 return ret;
1255 }
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001256 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001257 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001258}
1259
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001260struct BdrvTrackedRequest {
1261 BlockDriverState *bs;
1262 int64_t sector_num;
1263 int nb_sectors;
1264 bool is_write;
1265 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001266 Coroutine *co; /* owner, used for deadlock detection */
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001267 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001268};
1269
1270/**
1271 * Remove an active request from the tracked requests list
1272 *
1273 * This function should be called when a tracked request is completing.
1274 */
1275static void tracked_request_end(BdrvTrackedRequest *req)
1276{
1277 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001278 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001279}
1280
1281/**
1282 * Add an active request to the tracked requests list
1283 */
1284static void tracked_request_begin(BdrvTrackedRequest *req,
1285 BlockDriverState *bs,
1286 int64_t sector_num,
1287 int nb_sectors, bool is_write)
1288{
1289 *req = (BdrvTrackedRequest){
1290 .bs = bs,
1291 .sector_num = sector_num,
1292 .nb_sectors = nb_sectors,
1293 .is_write = is_write,
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001294 .co = qemu_coroutine_self(),
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001295 };
1296
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001297 qemu_co_queue_init(&req->wait_queue);
1298
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001299 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1300}
1301
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001302/**
1303 * Round a region to cluster boundaries
1304 */
1305static void round_to_clusters(BlockDriverState *bs,
1306 int64_t sector_num, int nb_sectors,
1307 int64_t *cluster_sector_num,
1308 int *cluster_nb_sectors)
1309{
1310 BlockDriverInfo bdi;
1311
1312 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1313 *cluster_sector_num = sector_num;
1314 *cluster_nb_sectors = nb_sectors;
1315 } else {
1316 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1317 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1318 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1319 nb_sectors, c);
1320 }
1321}
1322
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001323static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1324 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001325 /* aaaa bbbb */
1326 if (sector_num >= req->sector_num + req->nb_sectors) {
1327 return false;
1328 }
1329 /* bbbb aaaa */
1330 if (req->sector_num >= sector_num + nb_sectors) {
1331 return false;
1332 }
1333 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001334}
1335
1336static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1337 int64_t sector_num, int nb_sectors)
1338{
1339 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001340 int64_t cluster_sector_num;
1341 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001342 bool retry;
1343
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001344 /* If we touch the same cluster it counts as an overlap. This guarantees
1345 * that allocating writes will be serialized and not race with each other
1346 * for the same cluster. For example, in copy-on-read it ensures that the
1347 * CoR read and write operations are atomic and guest writes cannot
1348 * interleave between them.
1349 */
1350 round_to_clusters(bs, sector_num, nb_sectors,
1351 &cluster_sector_num, &cluster_nb_sectors);
1352
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001353 do {
1354 retry = false;
1355 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001356 if (tracked_request_overlaps(req, cluster_sector_num,
1357 cluster_nb_sectors)) {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001358 /* Hitting this means there was a reentrant request, for
1359 * example, a block driver issuing nested requests. This must
1360 * never happen since it means deadlock.
1361 */
1362 assert(qemu_coroutine_self() != req->co);
1363
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001364 qemu_co_queue_wait(&req->wait_queue);
1365 retry = true;
1366 break;
1367 }
1368 }
1369 } while (retry);
1370}
1371
Kevin Wolf756e6732010-01-12 12:55:17 +01001372/*
1373 * Return values:
1374 * 0 - success
1375 * -EINVAL - backing format specified, but no file
1376 * -ENOSPC - can't update the backing file because no space is left in the
1377 * image file header
1378 * -ENOTSUP - format driver doesn't support changing the backing file
1379 */
1380int bdrv_change_backing_file(BlockDriverState *bs,
1381 const char *backing_file, const char *backing_fmt)
1382{
1383 BlockDriver *drv = bs->drv;
1384
1385 if (drv->bdrv_change_backing_file != NULL) {
1386 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1387 } else {
1388 return -ENOTSUP;
1389 }
1390}
1391
aliguori71d07702009-03-03 17:37:16 +00001392static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1393 size_t size)
1394{
1395 int64_t len;
1396
1397 if (!bdrv_is_inserted(bs))
1398 return -ENOMEDIUM;
1399
1400 if (bs->growable)
1401 return 0;
1402
1403 len = bdrv_getlength(bs);
1404
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001405 if (offset < 0)
1406 return -EIO;
1407
1408 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001409 return -EIO;
1410
1411 return 0;
1412}
1413
1414static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1415 int nb_sectors)
1416{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001417 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1418 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001419}
1420
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001421typedef struct RwCo {
1422 BlockDriverState *bs;
1423 int64_t sector_num;
1424 int nb_sectors;
1425 QEMUIOVector *qiov;
1426 bool is_write;
1427 int ret;
1428} RwCo;
1429
1430static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1431{
1432 RwCo *rwco = opaque;
1433
1434 if (!rwco->is_write) {
1435 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001436 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001437 } else {
1438 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001439 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001440 }
1441}
1442
1443/*
1444 * Process a synchronous request using coroutines
1445 */
1446static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1447 int nb_sectors, bool is_write)
1448{
1449 QEMUIOVector qiov;
1450 struct iovec iov = {
1451 .iov_base = (void *)buf,
1452 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1453 };
1454 Coroutine *co;
1455 RwCo rwco = {
1456 .bs = bs,
1457 .sector_num = sector_num,
1458 .nb_sectors = nb_sectors,
1459 .qiov = &qiov,
1460 .is_write = is_write,
1461 .ret = NOT_DONE,
1462 };
1463
1464 qemu_iovec_init_external(&qiov, &iov, 1);
1465
1466 if (qemu_in_coroutine()) {
1467 /* Fast-path if already in coroutine context */
1468 bdrv_rw_co_entry(&rwco);
1469 } else {
1470 co = qemu_coroutine_create(bdrv_rw_co_entry);
1471 qemu_coroutine_enter(co, &rwco);
1472 while (rwco.ret == NOT_DONE) {
1473 qemu_aio_wait();
1474 }
1475 }
1476 return rwco.ret;
1477}
1478
bellard19cb3732006-08-19 11:45:59 +00001479/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001480int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001481 uint8_t *buf, int nb_sectors)
1482{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001483 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001484}
1485
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001486static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001487 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001488{
1489 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001490 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001491
Jan Kiszka6ea44302009-11-30 18:21:19 +01001492 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001493 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001494
1495 for (; start <= end; start++) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01001496 idx = start / (sizeof(unsigned long) * 8);
1497 bit = start % (sizeof(unsigned long) * 8);
1498 val = bs->dirty_bitmap[idx];
1499 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001500 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001501 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001502 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001503 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001504 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001505 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001506 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001507 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001508 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001509 }
1510 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001511 }
1512}
1513
ths5fafdf22007-09-16 21:08:06 +00001514/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001515 -EIO generic I/O error (may happen for all errors)
1516 -ENOMEDIUM No media inserted.
1517 -EINVAL Invalid sector number or nb_sectors
1518 -EACCES Trying to write a read-only device
1519*/
ths5fafdf22007-09-16 21:08:06 +00001520int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001521 const uint8_t *buf, int nb_sectors)
1522{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001523 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001524}
1525
aliguorieda578e2009-03-12 19:57:16 +00001526int bdrv_pread(BlockDriverState *bs, int64_t offset,
1527 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001528{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001529 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001530 int len, nb_sectors, count;
1531 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001532 int ret;
bellard83f64092006-08-01 16:21:11 +00001533
1534 count = count1;
1535 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001536 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001537 if (len > count)
1538 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001539 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001540 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001541 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1542 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001543 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001544 count -= len;
1545 if (count == 0)
1546 return count1;
1547 sector_num++;
1548 buf += len;
1549 }
1550
1551 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001552 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001553 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001554 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1555 return ret;
bellard83f64092006-08-01 16:21:11 +00001556 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001557 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001558 buf += len;
1559 count -= len;
1560 }
1561
1562 /* add data from the last sector */
1563 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001564 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1565 return ret;
bellard83f64092006-08-01 16:21:11 +00001566 memcpy(buf, tmp_buf, count);
1567 }
1568 return count1;
1569}
1570
aliguorieda578e2009-03-12 19:57:16 +00001571int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1572 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001573{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001574 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001575 int len, nb_sectors, count;
1576 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001577 int ret;
bellard83f64092006-08-01 16:21:11 +00001578
1579 count = count1;
1580 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001581 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001582 if (len > count)
1583 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001584 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001585 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001586 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1587 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001588 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001589 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1590 return ret;
bellard83f64092006-08-01 16:21:11 +00001591 count -= len;
1592 if (count == 0)
1593 return count1;
1594 sector_num++;
1595 buf += len;
1596 }
1597
1598 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001599 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001600 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001601 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1602 return ret;
bellard83f64092006-08-01 16:21:11 +00001603 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001604 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001605 buf += len;
1606 count -= len;
1607 }
1608
1609 /* add data from the last sector */
1610 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001611 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1612 return ret;
bellard83f64092006-08-01 16:21:11 +00001613 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001614 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1615 return ret;
bellard83f64092006-08-01 16:21:11 +00001616 }
1617 return count1;
1618}
bellard83f64092006-08-01 16:21:11 +00001619
Kevin Wolff08145f2010-06-16 16:38:15 +02001620/*
1621 * Writes to the file and ensures that no writes are reordered across this
1622 * request (acts as a barrier)
1623 *
1624 * Returns 0 on success, -errno in error cases.
1625 */
1626int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1627 const void *buf, int count)
1628{
1629 int ret;
1630
1631 ret = bdrv_pwrite(bs, offset, buf, count);
1632 if (ret < 0) {
1633 return ret;
1634 }
1635
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001636 /* No flush needed for cache modes that use O_DSYNC */
1637 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001638 bdrv_flush(bs);
1639 }
1640
1641 return 0;
1642}
1643
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001644static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001645 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1646{
1647 /* Perform I/O through a temporary buffer so that users who scribble over
1648 * their read buffer while the operation is in progress do not end up
1649 * modifying the image file. This is critical for zero-copy guest I/O
1650 * where anything might happen inside guest memory.
1651 */
1652 void *bounce_buffer;
1653
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001654 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00001655 struct iovec iov;
1656 QEMUIOVector bounce_qiov;
1657 int64_t cluster_sector_num;
1658 int cluster_nb_sectors;
1659 size_t skip_bytes;
1660 int ret;
1661
1662 /* Cover entire cluster so no additional backing file I/O is required when
1663 * allocating cluster in the image file.
1664 */
1665 round_to_clusters(bs, sector_num, nb_sectors,
1666 &cluster_sector_num, &cluster_nb_sectors);
1667
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001668 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1669 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001670
1671 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1672 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1673 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1674
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001675 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1676 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001677 if (ret < 0) {
1678 goto err;
1679 }
1680
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001681 if (drv->bdrv_co_write_zeroes &&
1682 buffer_is_zero(bounce_buffer, iov.iov_len)) {
1683 ret = drv->bdrv_co_write_zeroes(bs, cluster_sector_num,
1684 cluster_nb_sectors);
1685 } else {
1686 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001687 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001688 }
1689
Stefan Hajnocziab185922011-11-17 13:40:31 +00001690 if (ret < 0) {
1691 /* It might be okay to ignore write errors for guest requests. If this
1692 * is a deliberate copy-on-read then we don't want to ignore the error.
1693 * Simply report it in all cases.
1694 */
1695 goto err;
1696 }
1697
1698 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1699 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1700 nb_sectors * BDRV_SECTOR_SIZE);
1701
1702err:
1703 qemu_vfree(bounce_buffer);
1704 return ret;
1705}
1706
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001707/*
1708 * Handle a read request in coroutine context
1709 */
1710static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001711 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1712 BdrvRequestFlags flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001713{
1714 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001715 BdrvTrackedRequest req;
1716 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001717
Kevin Wolfda1fa912011-07-14 17:27:13 +02001718 if (!drv) {
1719 return -ENOMEDIUM;
1720 }
1721 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1722 return -EIO;
1723 }
1724
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001725 /* throttling disk read I/O */
1726 if (bs->io_limits_enabled) {
1727 bdrv_io_limits_intercept(bs, false, nb_sectors);
1728 }
1729
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001730 if (bs->copy_on_read) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001731 flags |= BDRV_REQ_COPY_ON_READ;
1732 }
1733 if (flags & BDRV_REQ_COPY_ON_READ) {
1734 bs->copy_on_read_in_flight++;
1735 }
1736
1737 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001738 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1739 }
1740
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001741 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001742
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001743 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00001744 int pnum;
1745
1746 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1747 if (ret < 0) {
1748 goto out;
1749 }
1750
1751 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001752 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001753 goto out;
1754 }
1755 }
1756
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001757 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001758
1759out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001760 tracked_request_end(&req);
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001761
1762 if (flags & BDRV_REQ_COPY_ON_READ) {
1763 bs->copy_on_read_in_flight--;
1764 }
1765
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001766 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001767}
1768
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001769int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001770 int nb_sectors, QEMUIOVector *qiov)
1771{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001772 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001773
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001774 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1775}
1776
1777int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1778 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1779{
1780 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1781
1782 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1783 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001784}
1785
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001786static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
1787 int64_t sector_num, int nb_sectors)
1788{
1789 BlockDriver *drv = bs->drv;
1790 QEMUIOVector qiov;
1791 struct iovec iov;
1792 int ret;
1793
1794 /* First try the efficient write zeroes operation */
1795 if (drv->bdrv_co_write_zeroes) {
1796 return drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1797 }
1798
1799 /* Fall back to bounce buffer if write zeroes is unsupported */
1800 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
1801 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
1802 memset(iov.iov_base, 0, iov.iov_len);
1803 qemu_iovec_init_external(&qiov, &iov, 1);
1804
1805 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1806
1807 qemu_vfree(iov.iov_base);
1808 return ret;
1809}
1810
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001811/*
1812 * Handle a write request in coroutine context
1813 */
1814static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001815 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1816 BdrvRequestFlags flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001817{
1818 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001819 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001820 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001821
1822 if (!bs->drv) {
1823 return -ENOMEDIUM;
1824 }
1825 if (bs->read_only) {
1826 return -EACCES;
1827 }
1828 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1829 return -EIO;
1830 }
1831
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001832 /* throttling disk write I/O */
1833 if (bs->io_limits_enabled) {
1834 bdrv_io_limits_intercept(bs, true, nb_sectors);
1835 }
1836
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001837 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001838 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1839 }
1840
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001841 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1842
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001843 if (flags & BDRV_REQ_ZERO_WRITE) {
1844 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
1845 } else {
1846 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1847 }
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001848
Kevin Wolfda1fa912011-07-14 17:27:13 +02001849 if (bs->dirty_bitmap) {
1850 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1851 }
1852
1853 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1854 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1855 }
1856
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001857 tracked_request_end(&req);
1858
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001859 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001860}
1861
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001862int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1863 int nb_sectors, QEMUIOVector *qiov)
1864{
1865 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1866
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001867 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
1868}
1869
1870int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
1871 int64_t sector_num, int nb_sectors)
1872{
1873 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1874
1875 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
1876 BDRV_REQ_ZERO_WRITE);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001877}
1878
bellard83f64092006-08-01 16:21:11 +00001879/**
bellard83f64092006-08-01 16:21:11 +00001880 * Truncate file to 'offset' bytes (needed only for file protocols)
1881 */
1882int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1883{
1884 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001885 int ret;
bellard83f64092006-08-01 16:21:11 +00001886 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001887 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00001888 if (!drv->bdrv_truncate)
1889 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02001890 if (bs->read_only)
1891 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02001892 if (bdrv_in_use(bs))
1893 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001894 ret = drv->bdrv_truncate(bs, offset);
1895 if (ret == 0) {
1896 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02001897 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001898 }
1899 return ret;
bellard83f64092006-08-01 16:21:11 +00001900}
1901
1902/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08001903 * Length of a allocated file in bytes. Sparse files are counted by actual
1904 * allocated space. Return < 0 if error or unknown.
1905 */
1906int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1907{
1908 BlockDriver *drv = bs->drv;
1909 if (!drv) {
1910 return -ENOMEDIUM;
1911 }
1912 if (drv->bdrv_get_allocated_file_size) {
1913 return drv->bdrv_get_allocated_file_size(bs);
1914 }
1915 if (bs->file) {
1916 return bdrv_get_allocated_file_size(bs->file);
1917 }
1918 return -ENOTSUP;
1919}
1920
1921/**
bellard83f64092006-08-01 16:21:11 +00001922 * Length of a file in bytes. Return < 0 if error or unknown.
1923 */
1924int64_t bdrv_getlength(BlockDriverState *bs)
1925{
1926 BlockDriver *drv = bs->drv;
1927 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001928 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001929
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001930 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001931 if (drv->bdrv_getlength) {
1932 return drv->bdrv_getlength(bs);
1933 }
bellard83f64092006-08-01 16:21:11 +00001934 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001935 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00001936}
1937
bellard19cb3732006-08-19 11:45:59 +00001938/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00001939void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00001940{
bellard19cb3732006-08-19 11:45:59 +00001941 int64_t length;
1942 length = bdrv_getlength(bs);
1943 if (length < 0)
1944 length = 0;
1945 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01001946 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00001947 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00001948}
bellardcf989512004-02-16 21:56:36 +00001949
aliguorif3d54fc2008-11-25 21:50:24 +00001950struct partition {
1951 uint8_t boot_ind; /* 0x80 - active */
1952 uint8_t head; /* starting head */
1953 uint8_t sector; /* starting sector */
1954 uint8_t cyl; /* starting cylinder */
1955 uint8_t sys_ind; /* What partition type */
1956 uint8_t end_head; /* end head */
1957 uint8_t end_sector; /* end sector */
1958 uint8_t end_cyl; /* end cylinder */
1959 uint32_t start_sect; /* starting sector counting from 0 */
1960 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02001961} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00001962
1963/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1964static int guess_disk_lchs(BlockDriverState *bs,
1965 int *pcylinders, int *pheads, int *psectors)
1966{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001967 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00001968 int ret, i, heads, sectors, cylinders;
1969 struct partition *p;
1970 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00001971 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001972
1973 bdrv_get_geometry(bs, &nb_sectors);
1974
1975 ret = bdrv_read(bs, 0, buf, 1);
1976 if (ret < 0)
1977 return -1;
1978 /* test msdos magic */
1979 if (buf[510] != 0x55 || buf[511] != 0xaa)
1980 return -1;
1981 for(i = 0; i < 4; i++) {
1982 p = ((struct partition *)(buf + 0x1be)) + i;
1983 nr_sects = le32_to_cpu(p->nr_sects);
1984 if (nr_sects && p->end_head) {
1985 /* We make the assumption that the partition terminates on
1986 a cylinder boundary */
1987 heads = p->end_head + 1;
1988 sectors = p->end_sector & 63;
1989 if (sectors == 0)
1990 continue;
1991 cylinders = nb_sectors / (heads * sectors);
1992 if (cylinders < 1 || cylinders > 16383)
1993 continue;
1994 *pheads = heads;
1995 *psectors = sectors;
1996 *pcylinders = cylinders;
1997#if 0
1998 printf("guessed geometry: LCHS=%d %d %d\n",
1999 cylinders, heads, sectors);
2000#endif
2001 return 0;
2002 }
2003 }
2004 return -1;
2005}
2006
2007void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
2008{
2009 int translation, lba_detected = 0;
2010 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00002011 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00002012
2013 /* if a geometry hint is available, use it */
2014 bdrv_get_geometry(bs, &nb_sectors);
2015 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
2016 translation = bdrv_get_translation_hint(bs);
2017 if (cylinders != 0) {
2018 *pcyls = cylinders;
2019 *pheads = heads;
2020 *psecs = secs;
2021 } else {
2022 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
2023 if (heads > 16) {
2024 /* if heads > 16, it means that a BIOS LBA
2025 translation was active, so the default
2026 hardware geometry is OK */
2027 lba_detected = 1;
2028 goto default_geometry;
2029 } else {
2030 *pcyls = cylinders;
2031 *pheads = heads;
2032 *psecs = secs;
2033 /* disable any translation to be in sync with
2034 the logical geometry */
2035 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
2036 bdrv_set_translation_hint(bs,
2037 BIOS_ATA_TRANSLATION_NONE);
2038 }
2039 }
2040 } else {
2041 default_geometry:
2042 /* if no geometry, use a standard physical disk geometry */
2043 cylinders = nb_sectors / (16 * 63);
2044
2045 if (cylinders > 16383)
2046 cylinders = 16383;
2047 else if (cylinders < 2)
2048 cylinders = 2;
2049 *pcyls = cylinders;
2050 *pheads = 16;
2051 *psecs = 63;
2052 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
2053 if ((*pcyls * *pheads) <= 131072) {
2054 bdrv_set_translation_hint(bs,
2055 BIOS_ATA_TRANSLATION_LARGE);
2056 } else {
2057 bdrv_set_translation_hint(bs,
2058 BIOS_ATA_TRANSLATION_LBA);
2059 }
2060 }
2061 }
2062 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
2063 }
2064}
2065
ths5fafdf22007-09-16 21:08:06 +00002066void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00002067 int cyls, int heads, int secs)
2068{
2069 bs->cyls = cyls;
2070 bs->heads = heads;
2071 bs->secs = secs;
2072}
2073
bellard46d47672004-11-16 01:45:27 +00002074void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
2075{
2076 bs->translation = translation;
2077}
2078
ths5fafdf22007-09-16 21:08:06 +00002079void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00002080 int *pcyls, int *pheads, int *psecs)
2081{
2082 *pcyls = bs->cyls;
2083 *pheads = bs->heads;
2084 *psecs = bs->secs;
2085}
2086
Zhi Yong Wu0563e192011-11-03 16:57:25 +08002087/* throttling disk io limits */
2088void bdrv_set_io_limits(BlockDriverState *bs,
2089 BlockIOLimit *io_limits)
2090{
2091 bs->io_limits = *io_limits;
2092 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2093}
2094
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002095/* Recognize floppy formats */
2096typedef struct FDFormat {
2097 FDriveType drive;
2098 uint8_t last_sect;
2099 uint8_t max_track;
2100 uint8_t max_head;
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002101 FDriveRate rate;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002102} FDFormat;
2103
2104static const FDFormat fd_formats[] = {
2105 /* First entry is default format */
2106 /* 1.44 MB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002107 { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
2108 { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
2109 { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
2110 { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
2111 { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
2112 { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
2113 { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
2114 { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002115 /* 2.88 MB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002116 { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
2117 { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
2118 { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
2119 { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
2120 { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002121 /* 720 kB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002122 { FDRIVE_DRV_144, 9, 80, 1, FDRIVE_RATE_250K, },
2123 { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
2124 { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
2125 { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
2126 { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
2127 { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002128 /* 1.2 MB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002129 { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
2130 { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
2131 { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
2132 { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
2133 { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002134 /* 720 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002135 { FDRIVE_DRV_120, 9, 80, 1, FDRIVE_RATE_250K, },
2136 { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002137 /* 360 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002138 { FDRIVE_DRV_120, 9, 40, 1, FDRIVE_RATE_300K, },
2139 { FDRIVE_DRV_120, 9, 40, 0, FDRIVE_RATE_300K, },
2140 { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
2141 { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002142 /* 320 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002143 { FDRIVE_DRV_120, 8, 40, 1, FDRIVE_RATE_250K, },
2144 { FDRIVE_DRV_120, 8, 40, 0, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002145 /* 360 kB must match 5"1/4 better than 3"1/2... */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002146 { FDRIVE_DRV_144, 9, 80, 0, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002147 /* end */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002148 { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002149};
2150
2151void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
2152 int *max_track, int *last_sect,
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002153 FDriveType drive_in, FDriveType *drive,
2154 FDriveRate *rate)
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002155{
2156 const FDFormat *parse;
2157 uint64_t nb_sectors, size;
2158 int i, first_match, match;
2159
2160 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
2161 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
2162 /* User defined disk */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002163 *rate = FDRIVE_RATE_500K;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002164 } else {
2165 bdrv_get_geometry(bs, &nb_sectors);
2166 match = -1;
2167 first_match = -1;
2168 for (i = 0; ; i++) {
2169 parse = &fd_formats[i];
2170 if (parse->drive == FDRIVE_DRV_NONE) {
2171 break;
2172 }
2173 if (drive_in == parse->drive ||
2174 drive_in == FDRIVE_DRV_NONE) {
2175 size = (parse->max_head + 1) * parse->max_track *
2176 parse->last_sect;
2177 if (nb_sectors == size) {
2178 match = i;
2179 break;
2180 }
2181 if (first_match == -1) {
2182 first_match = i;
2183 }
2184 }
2185 }
2186 if (match == -1) {
2187 if (first_match == -1) {
2188 match = 1;
2189 } else {
2190 match = first_match;
2191 }
2192 parse = &fd_formats[match];
2193 }
2194 *nb_heads = parse->max_head + 1;
2195 *max_track = parse->max_track;
2196 *last_sect = parse->last_sect;
2197 *drive = parse->drive;
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002198 *rate = parse->rate;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002199 }
2200}
2201
bellard46d47672004-11-16 01:45:27 +00002202int bdrv_get_translation_hint(BlockDriverState *bs)
2203{
2204 return bs->translation;
2205}
2206
Markus Armbrusterabd7f682010-06-02 18:55:17 +02002207void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2208 BlockErrorAction on_write_error)
2209{
2210 bs->on_read_error = on_read_error;
2211 bs->on_write_error = on_write_error;
2212}
2213
2214BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2215{
2216 return is_read ? bs->on_read_error : bs->on_write_error;
2217}
2218
bellardb3380822004-03-14 21:38:54 +00002219int bdrv_is_read_only(BlockDriverState *bs)
2220{
2221 return bs->read_only;
2222}
2223
ths985a03b2007-12-24 16:10:43 +00002224int bdrv_is_sg(BlockDriverState *bs)
2225{
2226 return bs->sg;
2227}
2228
Christoph Hellwige900a7b2009-09-04 19:01:15 +02002229int bdrv_enable_write_cache(BlockDriverState *bs)
2230{
2231 return bs->enable_write_cache;
2232}
2233
bellardea2384d2004-08-01 21:59:26 +00002234int bdrv_is_encrypted(BlockDriverState *bs)
2235{
2236 if (bs->backing_hd && bs->backing_hd->encrypted)
2237 return 1;
2238 return bs->encrypted;
2239}
2240
aliguoric0f4ce72009-03-05 23:01:01 +00002241int bdrv_key_required(BlockDriverState *bs)
2242{
2243 BlockDriverState *backing_hd = bs->backing_hd;
2244
2245 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2246 return 1;
2247 return (bs->encrypted && !bs->valid_key);
2248}
2249
bellardea2384d2004-08-01 21:59:26 +00002250int bdrv_set_key(BlockDriverState *bs, const char *key)
2251{
2252 int ret;
2253 if (bs->backing_hd && bs->backing_hd->encrypted) {
2254 ret = bdrv_set_key(bs->backing_hd, key);
2255 if (ret < 0)
2256 return ret;
2257 if (!bs->encrypted)
2258 return 0;
2259 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002260 if (!bs->encrypted) {
2261 return -EINVAL;
2262 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2263 return -ENOMEDIUM;
2264 }
aliguoric0f4ce72009-03-05 23:01:01 +00002265 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002266 if (ret < 0) {
2267 bs->valid_key = 0;
2268 } else if (!bs->valid_key) {
2269 bs->valid_key = 1;
2270 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002271 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002272 }
aliguoric0f4ce72009-03-05 23:01:01 +00002273 return ret;
bellardea2384d2004-08-01 21:59:26 +00002274}
2275
2276void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2277{
bellard19cb3732006-08-19 11:45:59 +00002278 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00002279 buf[0] = '\0';
2280 } else {
2281 pstrcpy(buf, buf_size, bs->drv->format_name);
2282 }
2283}
2284
ths5fafdf22007-09-16 21:08:06 +00002285void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002286 void *opaque)
2287{
2288 BlockDriver *drv;
2289
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002290 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002291 it(opaque, drv->format_name);
2292 }
2293}
2294
bellardb3380822004-03-14 21:38:54 +00002295BlockDriverState *bdrv_find(const char *name)
2296{
2297 BlockDriverState *bs;
2298
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002299 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2300 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002301 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002302 }
bellardb3380822004-03-14 21:38:54 +00002303 }
2304 return NULL;
2305}
2306
Markus Armbruster2f399b02010-06-02 18:55:20 +02002307BlockDriverState *bdrv_next(BlockDriverState *bs)
2308{
2309 if (!bs) {
2310 return QTAILQ_FIRST(&bdrv_states);
2311 }
2312 return QTAILQ_NEXT(bs, list);
2313}
2314
aliguori51de9762009-03-05 23:00:43 +00002315void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002316{
2317 BlockDriverState *bs;
2318
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002319 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002320 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002321 }
2322}
2323
bellardea2384d2004-08-01 21:59:26 +00002324const char *bdrv_get_device_name(BlockDriverState *bs)
2325{
2326 return bs->device_name;
2327}
2328
aliguoric6ca28d2008-10-06 13:55:43 +00002329void bdrv_flush_all(void)
2330{
2331 BlockDriverState *bs;
2332
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002333 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01002334 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002335 }
aliguoric6ca28d2008-10-06 13:55:43 +00002336}
2337
Kevin Wolff2feebb2010-04-14 17:30:35 +02002338int bdrv_has_zero_init(BlockDriverState *bs)
2339{
2340 assert(bs->drv);
2341
Kevin Wolf336c1c12010-07-28 11:26:29 +02002342 if (bs->drv->bdrv_has_zero_init) {
2343 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002344 }
2345
2346 return 1;
2347}
2348
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002349typedef struct BdrvCoIsAllocatedData {
2350 BlockDriverState *bs;
2351 int64_t sector_num;
2352 int nb_sectors;
2353 int *pnum;
2354 int ret;
2355 bool done;
2356} BdrvCoIsAllocatedData;
2357
thsf58c7b32008-06-05 21:53:49 +00002358/*
2359 * Returns true iff the specified sector is present in the disk image. Drivers
2360 * not implementing the functionality are assumed to not support backing files,
2361 * hence all their sectors are reported as allocated.
2362 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002363 * If 'sector_num' is beyond the end of the disk image the return value is 0
2364 * and 'pnum' is set to 0.
2365 *
thsf58c7b32008-06-05 21:53:49 +00002366 * 'pnum' is set to the number of sectors (including and immediately following
2367 * the specified sector) that are known to be in the same
2368 * allocated/unallocated state.
2369 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002370 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2371 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00002372 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002373int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2374 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002375{
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002376 int64_t n;
2377
2378 if (sector_num >= bs->total_sectors) {
2379 *pnum = 0;
2380 return 0;
2381 }
2382
2383 n = bs->total_sectors - sector_num;
2384 if (n < nb_sectors) {
2385 nb_sectors = n;
2386 }
2387
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002388 if (!bs->drv->bdrv_co_is_allocated) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002389 *pnum = nb_sectors;
thsf58c7b32008-06-05 21:53:49 +00002390 return 1;
2391 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002392
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002393 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2394}
2395
2396/* Coroutine wrapper for bdrv_is_allocated() */
2397static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2398{
2399 BdrvCoIsAllocatedData *data = opaque;
2400 BlockDriverState *bs = data->bs;
2401
2402 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2403 data->pnum);
2404 data->done = true;
2405}
2406
2407/*
2408 * Synchronous wrapper around bdrv_co_is_allocated().
2409 *
2410 * See bdrv_co_is_allocated() for details.
2411 */
2412int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2413 int *pnum)
2414{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002415 Coroutine *co;
2416 BdrvCoIsAllocatedData data = {
2417 .bs = bs,
2418 .sector_num = sector_num,
2419 .nb_sectors = nb_sectors,
2420 .pnum = pnum,
2421 .done = false,
2422 };
2423
2424 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2425 qemu_coroutine_enter(co, &data);
2426 while (!data.done) {
2427 qemu_aio_wait();
2428 }
2429 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002430}
2431
Luiz Capitulinob2023812011-09-21 17:16:47 -03002432BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002433{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002434 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002435 BlockDriverState *bs;
2436
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002437 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002438 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002439
Luiz Capitulinob2023812011-09-21 17:16:47 -03002440 info->value = g_malloc0(sizeof(*info->value));
2441 info->value->device = g_strdup(bs->device_name);
2442 info->value->type = g_strdup("unknown");
2443 info->value->locked = bdrv_dev_is_medium_locked(bs);
2444 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002445
Markus Armbrustere4def802011-09-06 18:58:53 +02002446 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002447 info->value->has_tray_open = true;
2448 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002449 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002450
2451 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002452 info->value->has_io_status = true;
2453 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002454 }
2455
bellard19cb3732006-08-19 11:45:59 +00002456 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002457 info->value->has_inserted = true;
2458 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2459 info->value->inserted->file = g_strdup(bs->filename);
2460 info->value->inserted->ro = bs->read_only;
2461 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2462 info->value->inserted->encrypted = bs->encrypted;
2463 if (bs->backing_file[0]) {
2464 info->value->inserted->has_backing_file = true;
2465 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002466 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002467
2468 if (bs->io_limits_enabled) {
2469 info->value->inserted->bps =
2470 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2471 info->value->inserted->bps_rd =
2472 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2473 info->value->inserted->bps_wr =
2474 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2475 info->value->inserted->iops =
2476 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2477 info->value->inserted->iops_rd =
2478 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2479 info->value->inserted->iops_wr =
2480 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2481 }
bellardb3380822004-03-14 21:38:54 +00002482 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002483
2484 /* XXX: waiting for the qapi to support GSList */
2485 if (!cur_item) {
2486 head = cur_item = info;
2487 } else {
2488 cur_item->next = info;
2489 cur_item = info;
2490 }
bellardb3380822004-03-14 21:38:54 +00002491 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002492
Luiz Capitulinob2023812011-09-21 17:16:47 -03002493 return head;
bellardb3380822004-03-14 21:38:54 +00002494}
thsa36e69d2007-12-02 05:18:19 +00002495
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002496/* Consider exposing this as a full fledged QMP command */
2497static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002498{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002499 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002500
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002501 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002502
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002503 if (bs->device_name[0]) {
2504 s->has_device = true;
2505 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002506 }
2507
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002508 s->stats = g_malloc0(sizeof(*s->stats));
2509 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2510 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2511 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2512 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2513 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2514 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2515 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2516 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2517 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2518
Kevin Wolf294cc352010-04-28 14:34:01 +02002519 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002520 s->has_parent = true;
2521 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002522 }
2523
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002524 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002525}
2526
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002527BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002528{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002529 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002530 BlockDriverState *bs;
2531
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002532 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002533 BlockStatsList *info = g_malloc0(sizeof(*info));
2534 info->value = qmp_query_blockstat(bs, NULL);
2535
2536 /* XXX: waiting for the qapi to support GSList */
2537 if (!cur_item) {
2538 head = cur_item = info;
2539 } else {
2540 cur_item->next = info;
2541 cur_item = info;
2542 }
thsa36e69d2007-12-02 05:18:19 +00002543 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002544
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002545 return head;
thsa36e69d2007-12-02 05:18:19 +00002546}
bellardea2384d2004-08-01 21:59:26 +00002547
aliguori045df332009-03-05 23:00:48 +00002548const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2549{
2550 if (bs->backing_hd && bs->backing_hd->encrypted)
2551 return bs->backing_file;
2552 else if (bs->encrypted)
2553 return bs->filename;
2554 else
2555 return NULL;
2556}
2557
ths5fafdf22007-09-16 21:08:06 +00002558void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002559 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002560{
Kevin Wolf3574c602011-10-26 11:02:11 +02002561 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002562}
2563
ths5fafdf22007-09-16 21:08:06 +00002564int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002565 const uint8_t *buf, int nb_sectors)
2566{
2567 BlockDriver *drv = bs->drv;
2568 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002569 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002570 if (!drv->bdrv_write_compressed)
2571 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002572 if (bdrv_check_request(bs, sector_num, nb_sectors))
2573 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002574
Jan Kiszkac6d22832009-11-30 18:21:20 +01002575 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002576 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2577 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002578
bellardfaea38e2006-08-05 21:31:00 +00002579 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2580}
ths3b46e622007-09-17 08:09:54 +00002581
bellardfaea38e2006-08-05 21:31:00 +00002582int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2583{
2584 BlockDriver *drv = bs->drv;
2585 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002586 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002587 if (!drv->bdrv_get_info)
2588 return -ENOTSUP;
2589 memset(bdi, 0, sizeof(*bdi));
2590 return drv->bdrv_get_info(bs, bdi);
2591}
2592
Christoph Hellwig45566e92009-07-10 23:11:57 +02002593int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2594 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002595{
2596 BlockDriver *drv = bs->drv;
2597 if (!drv)
2598 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002599 if (drv->bdrv_save_vmstate)
2600 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2601 if (bs->file)
2602 return bdrv_save_vmstate(bs->file, buf, pos, size);
2603 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002604}
2605
Christoph Hellwig45566e92009-07-10 23:11:57 +02002606int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2607 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002608{
2609 BlockDriver *drv = bs->drv;
2610 if (!drv)
2611 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002612 if (drv->bdrv_load_vmstate)
2613 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2614 if (bs->file)
2615 return bdrv_load_vmstate(bs->file, buf, pos, size);
2616 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002617}
2618
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002619void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2620{
2621 BlockDriver *drv = bs->drv;
2622
2623 if (!drv || !drv->bdrv_debug_event) {
2624 return;
2625 }
2626
2627 return drv->bdrv_debug_event(bs, event);
2628
2629}
2630
bellardfaea38e2006-08-05 21:31:00 +00002631/**************************************************************/
2632/* handling of snapshots */
2633
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002634int bdrv_can_snapshot(BlockDriverState *bs)
2635{
2636 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002637 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002638 return 0;
2639 }
2640
2641 if (!drv->bdrv_snapshot_create) {
2642 if (bs->file != NULL) {
2643 return bdrv_can_snapshot(bs->file);
2644 }
2645 return 0;
2646 }
2647
2648 return 1;
2649}
2650
Blue Swirl199630b2010-07-25 20:49:34 +00002651int bdrv_is_snapshot(BlockDriverState *bs)
2652{
2653 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2654}
2655
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002656BlockDriverState *bdrv_snapshots(void)
2657{
2658 BlockDriverState *bs;
2659
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002660 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002661 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002662 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002663
2664 bs = NULL;
2665 while ((bs = bdrv_next(bs))) {
2666 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002667 bs_snapshots = bs;
2668 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002669 }
2670 }
2671 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002672}
2673
ths5fafdf22007-09-16 21:08:06 +00002674int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002675 QEMUSnapshotInfo *sn_info)
2676{
2677 BlockDriver *drv = bs->drv;
2678 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002679 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002680 if (drv->bdrv_snapshot_create)
2681 return drv->bdrv_snapshot_create(bs, sn_info);
2682 if (bs->file)
2683 return bdrv_snapshot_create(bs->file, sn_info);
2684 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002685}
2686
ths5fafdf22007-09-16 21:08:06 +00002687int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002688 const char *snapshot_id)
2689{
2690 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002691 int ret, open_ret;
2692
bellardfaea38e2006-08-05 21:31:00 +00002693 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002694 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002695 if (drv->bdrv_snapshot_goto)
2696 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2697
2698 if (bs->file) {
2699 drv->bdrv_close(bs);
2700 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2701 open_ret = drv->bdrv_open(bs, bs->open_flags);
2702 if (open_ret < 0) {
2703 bdrv_delete(bs->file);
2704 bs->drv = NULL;
2705 return open_ret;
2706 }
2707 return ret;
2708 }
2709
2710 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002711}
2712
2713int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2714{
2715 BlockDriver *drv = bs->drv;
2716 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002717 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002718 if (drv->bdrv_snapshot_delete)
2719 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2720 if (bs->file)
2721 return bdrv_snapshot_delete(bs->file, snapshot_id);
2722 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002723}
2724
ths5fafdf22007-09-16 21:08:06 +00002725int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002726 QEMUSnapshotInfo **psn_info)
2727{
2728 BlockDriver *drv = bs->drv;
2729 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002730 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002731 if (drv->bdrv_snapshot_list)
2732 return drv->bdrv_snapshot_list(bs, psn_info);
2733 if (bs->file)
2734 return bdrv_snapshot_list(bs->file, psn_info);
2735 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002736}
2737
edison51ef6722010-09-21 19:58:41 -07002738int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2739 const char *snapshot_name)
2740{
2741 BlockDriver *drv = bs->drv;
2742 if (!drv) {
2743 return -ENOMEDIUM;
2744 }
2745 if (!bs->read_only) {
2746 return -EINVAL;
2747 }
2748 if (drv->bdrv_snapshot_load_tmp) {
2749 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2750 }
2751 return -ENOTSUP;
2752}
2753
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00002754BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2755 const char *backing_file)
2756{
2757 if (!bs->drv) {
2758 return NULL;
2759 }
2760
2761 if (bs->backing_hd) {
2762 if (strcmp(bs->backing_file, backing_file) == 0) {
2763 return bs->backing_hd;
2764 } else {
2765 return bdrv_find_backing_image(bs->backing_hd, backing_file);
2766 }
2767 }
2768
2769 return NULL;
2770}
2771
bellardfaea38e2006-08-05 21:31:00 +00002772#define NB_SUFFIXES 4
2773
2774char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2775{
2776 static const char suffixes[NB_SUFFIXES] = "KMGT";
2777 int64_t base;
2778 int i;
2779
2780 if (size <= 999) {
2781 snprintf(buf, buf_size, "%" PRId64, size);
2782 } else {
2783 base = 1024;
2784 for(i = 0; i < NB_SUFFIXES; i++) {
2785 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002786 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002787 (double)size / base,
2788 suffixes[i]);
2789 break;
2790 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002791 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002792 ((size + (base >> 1)) / base),
2793 suffixes[i]);
2794 break;
2795 }
2796 base = base * 1024;
2797 }
2798 }
2799 return buf;
2800}
2801
2802char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2803{
2804 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002805#ifdef _WIN32
2806 struct tm *ptm;
2807#else
bellardfaea38e2006-08-05 21:31:00 +00002808 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002809#endif
bellardfaea38e2006-08-05 21:31:00 +00002810 time_t ti;
2811 int64_t secs;
2812
2813 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002814 snprintf(buf, buf_size,
2815 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002816 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2817 } else {
2818 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002819#ifdef _WIN32
2820 ptm = localtime(&ti);
2821 strftime(date_buf, sizeof(date_buf),
2822 "%Y-%m-%d %H:%M:%S", ptm);
2823#else
bellardfaea38e2006-08-05 21:31:00 +00002824 localtime_r(&ti, &tm);
2825 strftime(date_buf, sizeof(date_buf),
2826 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002827#endif
bellardfaea38e2006-08-05 21:31:00 +00002828 secs = sn->vm_clock_nsec / 1000000000;
2829 snprintf(clock_buf, sizeof(clock_buf),
2830 "%02d:%02d:%02d.%03d",
2831 (int)(secs / 3600),
2832 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002833 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002834 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2835 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002836 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002837 sn->id_str, sn->name,
2838 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2839 date_buf,
2840 clock_buf);
2841 }
2842 return buf;
2843}
2844
bellard83f64092006-08-01 16:21:11 +00002845/**************************************************************/
2846/* async I/Os */
2847
aliguori3b69e4b2009-01-22 16:59:24 +00002848BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002849 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002850 BlockDriverCompletionFunc *cb, void *opaque)
2851{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002852 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2853
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002854 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002855 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002856}
2857
aliguorif141eaf2009-04-07 18:43:24 +00002858BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2859 QEMUIOVector *qiov, int nb_sectors,
2860 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002861{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002862 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2863
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002864 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002865 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002866}
2867
Kevin Wolf40b4f532009-09-09 17:53:37 +02002868
2869typedef struct MultiwriteCB {
2870 int error;
2871 int num_requests;
2872 int num_callbacks;
2873 struct {
2874 BlockDriverCompletionFunc *cb;
2875 void *opaque;
2876 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002877 } callbacks[];
2878} MultiwriteCB;
2879
2880static void multiwrite_user_cb(MultiwriteCB *mcb)
2881{
2882 int i;
2883
2884 for (i = 0; i < mcb->num_callbacks; i++) {
2885 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01002886 if (mcb->callbacks[i].free_qiov) {
2887 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2888 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002889 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002890 }
2891}
2892
2893static void multiwrite_cb(void *opaque, int ret)
2894{
2895 MultiwriteCB *mcb = opaque;
2896
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002897 trace_multiwrite_cb(mcb, ret);
2898
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02002899 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02002900 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002901 }
2902
2903 mcb->num_requests--;
2904 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02002905 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05002906 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002907 }
2908}
2909
2910static int multiwrite_req_compare(const void *a, const void *b)
2911{
Christoph Hellwig77be4362010-05-19 20:53:10 +02002912 const BlockRequest *req1 = a, *req2 = b;
2913
2914 /*
2915 * Note that we can't simply subtract req2->sector from req1->sector
2916 * here as that could overflow the return value.
2917 */
2918 if (req1->sector > req2->sector) {
2919 return 1;
2920 } else if (req1->sector < req2->sector) {
2921 return -1;
2922 } else {
2923 return 0;
2924 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002925}
2926
2927/*
2928 * Takes a bunch of requests and tries to merge them. Returns the number of
2929 * requests that remain after merging.
2930 */
2931static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2932 int num_reqs, MultiwriteCB *mcb)
2933{
2934 int i, outidx;
2935
2936 // Sort requests by start sector
2937 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2938
2939 // Check if adjacent requests touch the same clusters. If so, combine them,
2940 // filling up gaps with zero sectors.
2941 outidx = 0;
2942 for (i = 1; i < num_reqs; i++) {
2943 int merge = 0;
2944 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2945
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01002946 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02002947 if (reqs[i].sector <= oldreq_last) {
2948 merge = 1;
2949 }
2950
Christoph Hellwige2a305f2010-01-26 14:49:08 +01002951 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2952 merge = 0;
2953 }
2954
Kevin Wolf40b4f532009-09-09 17:53:37 +02002955 if (merge) {
2956 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05002957 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002958 qemu_iovec_init(qiov,
2959 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2960
2961 // Add the first request to the merged one. If the requests are
2962 // overlapping, drop the last sectors of the first request.
2963 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2964 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2965
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01002966 // We should need to add any zeros between the two requests
2967 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002968
2969 // Add the second request
2970 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2971
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02002972 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002973 reqs[outidx].qiov = qiov;
2974
2975 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2976 } else {
2977 outidx++;
2978 reqs[outidx].sector = reqs[i].sector;
2979 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2980 reqs[outidx].qiov = reqs[i].qiov;
2981 }
2982 }
2983
2984 return outidx + 1;
2985}
2986
2987/*
2988 * Submit multiple AIO write requests at once.
2989 *
2990 * On success, the function returns 0 and all requests in the reqs array have
2991 * been submitted. In error case this function returns -1, and any of the
2992 * requests may or may not be submitted yet. In particular, this means that the
2993 * callback will be called for some of the requests, for others it won't. The
2994 * caller must check the error field of the BlockRequest to wait for the right
2995 * callbacks (if error != 0, no callback will be called).
2996 *
2997 * The implementation may modify the contents of the reqs array, e.g. to merge
2998 * requests. However, the fields opaque and error are left unmodified as they
2999 * are used to signal failure for a single request to the caller.
3000 */
3001int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3002{
Kevin Wolf40b4f532009-09-09 17:53:37 +02003003 MultiwriteCB *mcb;
3004 int i;
3005
Ryan Harper301db7c2011-03-07 10:01:04 -06003006 /* don't submit writes if we don't have a medium */
3007 if (bs->drv == NULL) {
3008 for (i = 0; i < num_reqs; i++) {
3009 reqs[i].error = -ENOMEDIUM;
3010 }
3011 return -1;
3012 }
3013
Kevin Wolf40b4f532009-09-09 17:53:37 +02003014 if (num_reqs == 0) {
3015 return 0;
3016 }
3017
3018 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05003019 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02003020 mcb->num_requests = 0;
3021 mcb->num_callbacks = num_reqs;
3022
3023 for (i = 0; i < num_reqs; i++) {
3024 mcb->callbacks[i].cb = reqs[i].cb;
3025 mcb->callbacks[i].opaque = reqs[i].opaque;
3026 }
3027
3028 // Check for mergable requests
3029 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3030
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01003031 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3032
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01003033 /* Run the aio requests. */
3034 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003035 for (i = 0; i < num_reqs; i++) {
Paolo Bonziniad54ae82011-11-30 09:12:30 +01003036 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
Kevin Wolf40b4f532009-09-09 17:53:37 +02003037 reqs[i].nb_sectors, multiwrite_cb, mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003038 }
3039
3040 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003041}
3042
bellard83f64092006-08-01 16:21:11 +00003043void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00003044{
aliguori6bbff9a2009-03-20 18:25:59 +00003045 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00003046}
3047
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08003048/* block I/O throttling */
3049static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3050 bool is_write, double elapsed_time, uint64_t *wait)
3051{
3052 uint64_t bps_limit = 0;
3053 double bytes_limit, bytes_base, bytes_res;
3054 double slice_time, wait_time;
3055
3056 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3057 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3058 } else if (bs->io_limits.bps[is_write]) {
3059 bps_limit = bs->io_limits.bps[is_write];
3060 } else {
3061 if (wait) {
3062 *wait = 0;
3063 }
3064
3065 return false;
3066 }
3067
3068 slice_time = bs->slice_end - bs->slice_start;
3069 slice_time /= (NANOSECONDS_PER_SECOND);
3070 bytes_limit = bps_limit * slice_time;
3071 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3072 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3073 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3074 }
3075
3076 /* bytes_base: the bytes of data which have been read/written; and
3077 * it is obtained from the history statistic info.
3078 * bytes_res: the remaining bytes of data which need to be read/written.
3079 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3080 * the total time for completing reading/writting all data.
3081 */
3082 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3083
3084 if (bytes_base + bytes_res <= bytes_limit) {
3085 if (wait) {
3086 *wait = 0;
3087 }
3088
3089 return false;
3090 }
3091
3092 /* Calc approx time to dispatch */
3093 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3094
3095 /* When the I/O rate at runtime exceeds the limits,
3096 * bs->slice_end need to be extended in order that the current statistic
3097 * info can be kept until the timer fire, so it is increased and tuned
3098 * based on the result of experiment.
3099 */
3100 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3101 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3102 if (wait) {
3103 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3104 }
3105
3106 return true;
3107}
3108
3109static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3110 double elapsed_time, uint64_t *wait)
3111{
3112 uint64_t iops_limit = 0;
3113 double ios_limit, ios_base;
3114 double slice_time, wait_time;
3115
3116 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3117 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3118 } else if (bs->io_limits.iops[is_write]) {
3119 iops_limit = bs->io_limits.iops[is_write];
3120 } else {
3121 if (wait) {
3122 *wait = 0;
3123 }
3124
3125 return false;
3126 }
3127
3128 slice_time = bs->slice_end - bs->slice_start;
3129 slice_time /= (NANOSECONDS_PER_SECOND);
3130 ios_limit = iops_limit * slice_time;
3131 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3132 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3133 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3134 }
3135
3136 if (ios_base + 1 <= ios_limit) {
3137 if (wait) {
3138 *wait = 0;
3139 }
3140
3141 return false;
3142 }
3143
3144 /* Calc approx time to dispatch */
3145 wait_time = (ios_base + 1) / iops_limit;
3146 if (wait_time > elapsed_time) {
3147 wait_time = wait_time - elapsed_time;
3148 } else {
3149 wait_time = 0;
3150 }
3151
3152 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3153 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3154 if (wait) {
3155 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3156 }
3157
3158 return true;
3159}
3160
3161static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3162 bool is_write, int64_t *wait)
3163{
3164 int64_t now, max_wait;
3165 uint64_t bps_wait = 0, iops_wait = 0;
3166 double elapsed_time;
3167 int bps_ret, iops_ret;
3168
3169 now = qemu_get_clock_ns(vm_clock);
3170 if ((bs->slice_start < now)
3171 && (bs->slice_end > now)) {
3172 bs->slice_end = now + bs->slice_time;
3173 } else {
3174 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3175 bs->slice_start = now;
3176 bs->slice_end = now + bs->slice_time;
3177
3178 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3179 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3180
3181 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3182 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3183 }
3184
3185 elapsed_time = now - bs->slice_start;
3186 elapsed_time /= (NANOSECONDS_PER_SECOND);
3187
3188 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3189 is_write, elapsed_time, &bps_wait);
3190 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3191 elapsed_time, &iops_wait);
3192 if (bps_ret || iops_ret) {
3193 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3194 if (wait) {
3195 *wait = max_wait;
3196 }
3197
3198 now = qemu_get_clock_ns(vm_clock);
3199 if (bs->slice_end < now + max_wait) {
3200 bs->slice_end = now + max_wait;
3201 }
3202
3203 return true;
3204 }
3205
3206 if (wait) {
3207 *wait = 0;
3208 }
3209
3210 return false;
3211}
pbrookce1a14d2006-08-07 02:38:06 +00003212
bellard83f64092006-08-01 16:21:11 +00003213/**************************************************************/
3214/* async block device emulation */
3215
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003216typedef struct BlockDriverAIOCBSync {
3217 BlockDriverAIOCB common;
3218 QEMUBH *bh;
3219 int ret;
3220 /* vector translation state */
3221 QEMUIOVector *qiov;
3222 uint8_t *bounce;
3223 int is_write;
3224} BlockDriverAIOCBSync;
3225
3226static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3227{
Kevin Wolfb666d232010-05-05 11:44:39 +02003228 BlockDriverAIOCBSync *acb =
3229 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003230 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003231 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003232 qemu_aio_release(acb);
3233}
3234
3235static AIOPool bdrv_em_aio_pool = {
3236 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3237 .cancel = bdrv_aio_cancel_em,
3238};
3239
bellard83f64092006-08-01 16:21:11 +00003240static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003241{
pbrookce1a14d2006-08-07 02:38:06 +00003242 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003243
aliguorif141eaf2009-04-07 18:43:24 +00003244 if (!acb->is_write)
3245 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003246 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003247 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003248 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003249 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003250 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003251}
bellardbeac80c2006-06-26 20:08:57 +00003252
aliguorif141eaf2009-04-07 18:43:24 +00003253static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3254 int64_t sector_num,
3255 QEMUIOVector *qiov,
3256 int nb_sectors,
3257 BlockDriverCompletionFunc *cb,
3258 void *opaque,
3259 int is_write)
3260
bellardea2384d2004-08-01 21:59:26 +00003261{
pbrookce1a14d2006-08-07 02:38:06 +00003262 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003263
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003264 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003265 acb->is_write = is_write;
3266 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003267 acb->bounce = qemu_blockalign(bs, qiov->size);
Paolo Bonzini3f3aace2011-11-14 17:50:54 +01003268 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003269
3270 if (is_write) {
3271 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003272 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003273 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003274 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003275 }
3276
pbrookce1a14d2006-08-07 02:38:06 +00003277 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003278
pbrookce1a14d2006-08-07 02:38:06 +00003279 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003280}
3281
aliguorif141eaf2009-04-07 18:43:24 +00003282static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3283 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003284 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003285{
aliguorif141eaf2009-04-07 18:43:24 +00003286 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003287}
3288
aliguorif141eaf2009-04-07 18:43:24 +00003289static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3290 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3291 BlockDriverCompletionFunc *cb, void *opaque)
3292{
3293 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3294}
3295
Kevin Wolf68485422011-06-30 10:05:46 +02003296
3297typedef struct BlockDriverAIOCBCoroutine {
3298 BlockDriverAIOCB common;
3299 BlockRequest req;
3300 bool is_write;
3301 QEMUBH* bh;
3302} BlockDriverAIOCBCoroutine;
3303
3304static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3305{
3306 qemu_aio_flush();
3307}
3308
3309static AIOPool bdrv_em_co_aio_pool = {
3310 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3311 .cancel = bdrv_aio_co_cancel_em,
3312};
3313
Paolo Bonzini35246a62011-10-14 10:41:29 +02003314static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003315{
3316 BlockDriverAIOCBCoroutine *acb = opaque;
3317
3318 acb->common.cb(acb->common.opaque, acb->req.error);
3319 qemu_bh_delete(acb->bh);
3320 qemu_aio_release(acb);
3321}
3322
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003323/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3324static void coroutine_fn bdrv_co_do_rw(void *opaque)
3325{
3326 BlockDriverAIOCBCoroutine *acb = opaque;
3327 BlockDriverState *bs = acb->common.bs;
3328
3329 if (!acb->is_write) {
3330 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003331 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003332 } else {
3333 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003334 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003335 }
3336
Paolo Bonzini35246a62011-10-14 10:41:29 +02003337 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003338 qemu_bh_schedule(acb->bh);
3339}
3340
Kevin Wolf68485422011-06-30 10:05:46 +02003341static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3342 int64_t sector_num,
3343 QEMUIOVector *qiov,
3344 int nb_sectors,
3345 BlockDriverCompletionFunc *cb,
3346 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003347 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003348{
3349 Coroutine *co;
3350 BlockDriverAIOCBCoroutine *acb;
3351
3352 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3353 acb->req.sector = sector_num;
3354 acb->req.nb_sectors = nb_sectors;
3355 acb->req.qiov = qiov;
3356 acb->is_write = is_write;
3357
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003358 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003359 qemu_coroutine_enter(co, acb);
3360
3361 return &acb->common;
3362}
3363
Paolo Bonzini07f07612011-10-17 12:32:12 +02003364static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003365{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003366 BlockDriverAIOCBCoroutine *acb = opaque;
3367 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003368
Paolo Bonzini07f07612011-10-17 12:32:12 +02003369 acb->req.error = bdrv_co_flush(bs);
3370 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003371 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003372}
3373
Paolo Bonzini07f07612011-10-17 12:32:12 +02003374BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003375 BlockDriverCompletionFunc *cb, void *opaque)
3376{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003377 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003378
Paolo Bonzini07f07612011-10-17 12:32:12 +02003379 Coroutine *co;
3380 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003381
Paolo Bonzini07f07612011-10-17 12:32:12 +02003382 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3383 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3384 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003385
Alexander Graf016f5cf2010-05-26 17:51:49 +02003386 return &acb->common;
3387}
3388
Paolo Bonzini4265d622011-10-17 12:32:14 +02003389static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3390{
3391 BlockDriverAIOCBCoroutine *acb = opaque;
3392 BlockDriverState *bs = acb->common.bs;
3393
3394 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3395 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3396 qemu_bh_schedule(acb->bh);
3397}
3398
3399BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3400 int64_t sector_num, int nb_sectors,
3401 BlockDriverCompletionFunc *cb, void *opaque)
3402{
3403 Coroutine *co;
3404 BlockDriverAIOCBCoroutine *acb;
3405
3406 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3407
3408 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3409 acb->req.sector = sector_num;
3410 acb->req.nb_sectors = nb_sectors;
3411 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3412 qemu_coroutine_enter(co, acb);
3413
3414 return &acb->common;
3415}
3416
bellardea2384d2004-08-01 21:59:26 +00003417void bdrv_init(void)
3418{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003419 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003420}
pbrookce1a14d2006-08-07 02:38:06 +00003421
Markus Armbrustereb852012009-10-27 18:41:44 +01003422void bdrv_init_with_whitelist(void)
3423{
3424 use_bdrv_whitelist = 1;
3425 bdrv_init();
3426}
3427
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003428void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3429 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003430{
pbrookce1a14d2006-08-07 02:38:06 +00003431 BlockDriverAIOCB *acb;
3432
aliguori6bbff9a2009-03-20 18:25:59 +00003433 if (pool->free_aiocb) {
3434 acb = pool->free_aiocb;
3435 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003436 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003437 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003438 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003439 }
3440 acb->bs = bs;
3441 acb->cb = cb;
3442 acb->opaque = opaque;
3443 return acb;
3444}
3445
3446void qemu_aio_release(void *p)
3447{
aliguori6bbff9a2009-03-20 18:25:59 +00003448 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3449 AIOPool *pool = acb->pool;
3450 acb->next = pool->free_aiocb;
3451 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003452}
bellard19cb3732006-08-19 11:45:59 +00003453
3454/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003455/* Coroutine block device emulation */
3456
3457typedef struct CoroutineIOCompletion {
3458 Coroutine *coroutine;
3459 int ret;
3460} CoroutineIOCompletion;
3461
3462static void bdrv_co_io_em_complete(void *opaque, int ret)
3463{
3464 CoroutineIOCompletion *co = opaque;
3465
3466 co->ret = ret;
3467 qemu_coroutine_enter(co->coroutine, NULL);
3468}
3469
3470static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3471 int nb_sectors, QEMUIOVector *iov,
3472 bool is_write)
3473{
3474 CoroutineIOCompletion co = {
3475 .coroutine = qemu_coroutine_self(),
3476 };
3477 BlockDriverAIOCB *acb;
3478
3479 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003480 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3481 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003482 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003483 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3484 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003485 }
3486
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003487 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003488 if (!acb) {
3489 return -EIO;
3490 }
3491 qemu_coroutine_yield();
3492
3493 return co.ret;
3494}
3495
3496static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3497 int64_t sector_num, int nb_sectors,
3498 QEMUIOVector *iov)
3499{
3500 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3501}
3502
3503static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3504 int64_t sector_num, int nb_sectors,
3505 QEMUIOVector *iov)
3506{
3507 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3508}
3509
Paolo Bonzini07f07612011-10-17 12:32:12 +02003510static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003511{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003512 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003513
Paolo Bonzini07f07612011-10-17 12:32:12 +02003514 rwco->ret = bdrv_co_flush(rwco->bs);
3515}
3516
3517int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3518{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003519 int ret;
3520
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003521 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003522 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003523 }
3524
Kevin Wolfca716362011-11-10 18:13:59 +01003525 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003526 if (bs->drv->bdrv_co_flush_to_os) {
3527 ret = bs->drv->bdrv_co_flush_to_os(bs);
3528 if (ret < 0) {
3529 return ret;
3530 }
3531 }
3532
Kevin Wolfca716362011-11-10 18:13:59 +01003533 /* But don't actually force it to the disk with cache=unsafe */
3534 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3535 return 0;
3536 }
3537
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003538 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003539 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003540 } else if (bs->drv->bdrv_aio_flush) {
3541 BlockDriverAIOCB *acb;
3542 CoroutineIOCompletion co = {
3543 .coroutine = qemu_coroutine_self(),
3544 };
3545
3546 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3547 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003548 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003549 } else {
3550 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003551 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003552 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003553 } else {
3554 /*
3555 * Some block drivers always operate in either writethrough or unsafe
3556 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3557 * know how the server works (because the behaviour is hardcoded or
3558 * depends on server-side configuration), so we can't ensure that
3559 * everything is safe on disk. Returning an error doesn't work because
3560 * that would break guests even if the server operates in writethrough
3561 * mode.
3562 *
3563 * Let's hope the user knows what he's doing.
3564 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003565 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003566 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003567 if (ret < 0) {
3568 return ret;
3569 }
3570
3571 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3572 * in the case of cache=unsafe, so there are no useless flushes.
3573 */
3574 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003575}
3576
Anthony Liguori0f154232011-11-14 15:09:45 -06003577void bdrv_invalidate_cache(BlockDriverState *bs)
3578{
3579 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3580 bs->drv->bdrv_invalidate_cache(bs);
3581 }
3582}
3583
3584void bdrv_invalidate_cache_all(void)
3585{
3586 BlockDriverState *bs;
3587
3588 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3589 bdrv_invalidate_cache(bs);
3590 }
3591}
3592
Paolo Bonzini07f07612011-10-17 12:32:12 +02003593int bdrv_flush(BlockDriverState *bs)
3594{
3595 Coroutine *co;
3596 RwCo rwco = {
3597 .bs = bs,
3598 .ret = NOT_DONE,
3599 };
3600
3601 if (qemu_in_coroutine()) {
3602 /* Fast-path if already in coroutine context */
3603 bdrv_flush_co_entry(&rwco);
3604 } else {
3605 co = qemu_coroutine_create(bdrv_flush_co_entry);
3606 qemu_coroutine_enter(co, &rwco);
3607 while (rwco.ret == NOT_DONE) {
3608 qemu_aio_wait();
3609 }
3610 }
3611
3612 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003613}
3614
Paolo Bonzini4265d622011-10-17 12:32:14 +02003615static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3616{
3617 RwCo *rwco = opaque;
3618
3619 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3620}
3621
3622int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3623 int nb_sectors)
3624{
3625 if (!bs->drv) {
3626 return -ENOMEDIUM;
3627 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3628 return -EIO;
3629 } else if (bs->read_only) {
3630 return -EROFS;
3631 } else if (bs->drv->bdrv_co_discard) {
3632 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3633 } else if (bs->drv->bdrv_aio_discard) {
3634 BlockDriverAIOCB *acb;
3635 CoroutineIOCompletion co = {
3636 .coroutine = qemu_coroutine_self(),
3637 };
3638
3639 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3640 bdrv_co_io_em_complete, &co);
3641 if (acb == NULL) {
3642 return -EIO;
3643 } else {
3644 qemu_coroutine_yield();
3645 return co.ret;
3646 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003647 } else {
3648 return 0;
3649 }
3650}
3651
3652int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3653{
3654 Coroutine *co;
3655 RwCo rwco = {
3656 .bs = bs,
3657 .sector_num = sector_num,
3658 .nb_sectors = nb_sectors,
3659 .ret = NOT_DONE,
3660 };
3661
3662 if (qemu_in_coroutine()) {
3663 /* Fast-path if already in coroutine context */
3664 bdrv_discard_co_entry(&rwco);
3665 } else {
3666 co = qemu_coroutine_create(bdrv_discard_co_entry);
3667 qemu_coroutine_enter(co, &rwco);
3668 while (rwco.ret == NOT_DONE) {
3669 qemu_aio_wait();
3670 }
3671 }
3672
3673 return rwco.ret;
3674}
3675
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003676/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003677/* removable device support */
3678
3679/**
3680 * Return TRUE if the media is present
3681 */
3682int bdrv_is_inserted(BlockDriverState *bs)
3683{
3684 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003685
bellard19cb3732006-08-19 11:45:59 +00003686 if (!drv)
3687 return 0;
3688 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003689 return 1;
3690 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003691}
3692
3693/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003694 * Return whether the media changed since the last call to this
3695 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003696 */
3697int bdrv_media_changed(BlockDriverState *bs)
3698{
3699 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003700
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003701 if (drv && drv->bdrv_media_changed) {
3702 return drv->bdrv_media_changed(bs);
3703 }
3704 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003705}
3706
3707/**
3708 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3709 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02003710void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003711{
3712 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003713
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003714 if (drv && drv->bdrv_eject) {
3715 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003716 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02003717
3718 if (bs->device_name[0] != '\0') {
3719 bdrv_emit_qmp_eject_event(bs, eject_flag);
3720 }
bellard19cb3732006-08-19 11:45:59 +00003721}
3722
bellard19cb3732006-08-19 11:45:59 +00003723/**
3724 * Lock or unlock the media (if it is locked, the user won't be able
3725 * to eject it manually).
3726 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003727void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003728{
3729 BlockDriver *drv = bs->drv;
3730
Markus Armbruster025e8492011-09-06 18:58:47 +02003731 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003732
Markus Armbruster025e8492011-09-06 18:58:47 +02003733 if (drv && drv->bdrv_lock_medium) {
3734 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003735 }
3736}
ths985a03b2007-12-24 16:10:43 +00003737
3738/* needed for generic scsi interface */
3739
3740int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3741{
3742 BlockDriver *drv = bs->drv;
3743
3744 if (drv && drv->bdrv_ioctl)
3745 return drv->bdrv_ioctl(bs, req, buf);
3746 return -ENOTSUP;
3747}
aliguori7d780662009-03-12 19:57:08 +00003748
aliguori221f7152009-03-28 17:28:41 +00003749BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3750 unsigned long int req, void *buf,
3751 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003752{
aliguori221f7152009-03-28 17:28:41 +00003753 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003754
aliguori221f7152009-03-28 17:28:41 +00003755 if (drv && drv->bdrv_aio_ioctl)
3756 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3757 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003758}
aliguorie268ca52009-04-22 20:20:00 +00003759
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003760void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3761{
3762 bs->buffer_alignment = align;
3763}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003764
aliguorie268ca52009-04-22 20:20:00 +00003765void *qemu_blockalign(BlockDriverState *bs, size_t size)
3766{
3767 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3768}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003769
3770void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3771{
3772 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003773
Liran Schouraaa0eb72010-01-26 10:31:48 +02003774 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003775 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003776 if (!bs->dirty_bitmap) {
3777 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3778 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3779 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003780
Anthony Liguori7267c092011-08-20 22:09:37 -05003781 bs->dirty_bitmap = g_malloc0(bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003782 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003783 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003784 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003785 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003786 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003787 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003788 }
3789}
3790
3791int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3792{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003793 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003794
Jan Kiszkac6d22832009-11-30 18:21:20 +01003795 if (bs->dirty_bitmap &&
3796 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003797 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3798 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003799 } else {
3800 return 0;
3801 }
3802}
3803
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003804void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3805 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003806{
3807 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3808}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003809
3810int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3811{
3812 return bs->dirty_count;
3813}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003814
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003815void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3816{
3817 assert(bs->in_use != in_use);
3818 bs->in_use = in_use;
3819}
3820
3821int bdrv_in_use(BlockDriverState *bs)
3822{
3823 return bs->in_use;
3824}
3825
Luiz Capitulino28a72822011-09-26 17:43:50 -03003826void bdrv_iostatus_enable(BlockDriverState *bs)
3827{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003828 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003829 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003830}
3831
3832/* The I/O status is only enabled if the drive explicitly
3833 * enables it _and_ the VM is configured to stop on errors */
3834bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3835{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003836 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003837 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3838 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3839 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3840}
3841
3842void bdrv_iostatus_disable(BlockDriverState *bs)
3843{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003844 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003845}
3846
3847void bdrv_iostatus_reset(BlockDriverState *bs)
3848{
3849 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003850 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003851 }
3852}
3853
3854/* XXX: Today this is set by device models because it makes the implementation
3855 quite simple. However, the block layer knows about the error, so it's
3856 possible to implement this without device models being involved */
3857void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3858{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003859 if (bdrv_iostatus_is_enabled(bs) &&
3860 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003861 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003862 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3863 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003864 }
3865}
3866
Christoph Hellwiga597e792011-08-25 08:26:01 +02003867void
3868bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3869 enum BlockAcctType type)
3870{
3871 assert(type < BDRV_MAX_IOTYPE);
3872
3873 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003874 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02003875 cookie->type = type;
3876}
3877
3878void
3879bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3880{
3881 assert(cookie->type < BDRV_MAX_IOTYPE);
3882
3883 bs->nr_bytes[cookie->type] += cookie->bytes;
3884 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003885 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02003886}
3887
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003888int bdrv_img_create(const char *filename, const char *fmt,
3889 const char *base_filename, const char *base_fmt,
3890 char *options, uint64_t img_size, int flags)
3891{
3892 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02003893 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003894 BlockDriverState *bs = NULL;
3895 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003896 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003897 int ret = 0;
3898
3899 /* Find driver and parse its options */
3900 drv = bdrv_find_format(fmt);
3901 if (!drv) {
3902 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003903 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003904 goto out;
3905 }
3906
3907 proto_drv = bdrv_find_protocol(filename);
3908 if (!proto_drv) {
3909 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003910 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003911 goto out;
3912 }
3913
3914 create_options = append_option_parameters(create_options,
3915 drv->create_options);
3916 create_options = append_option_parameters(create_options,
3917 proto_drv->create_options);
3918
3919 /* Create parameter list with default values */
3920 param = parse_option_parameters("", create_options, param);
3921
3922 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3923
3924 /* Parse -o options */
3925 if (options) {
3926 param = parse_option_parameters(options, create_options, param);
3927 if (param == NULL) {
3928 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003929 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003930 goto out;
3931 }
3932 }
3933
3934 if (base_filename) {
3935 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3936 base_filename)) {
3937 error_report("Backing file not supported for file format '%s'",
3938 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003939 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003940 goto out;
3941 }
3942 }
3943
3944 if (base_fmt) {
3945 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3946 error_report("Backing file format not supported for file "
3947 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003948 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003949 goto out;
3950 }
3951 }
3952
Jes Sorensen792da932010-12-16 13:52:17 +01003953 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3954 if (backing_file && backing_file->value.s) {
3955 if (!strcmp(filename, backing_file->value.s)) {
3956 error_report("Error: Trying to create an image with the "
3957 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003958 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01003959 goto out;
3960 }
3961 }
3962
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003963 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3964 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003965 backing_drv = bdrv_find_format(backing_fmt->value.s);
3966 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003967 error_report("Unknown backing file format '%s'",
3968 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003969 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003970 goto out;
3971 }
3972 }
3973
3974 // The size for the image must always be specified, with one exception:
3975 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02003976 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3977 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003978 if (backing_file && backing_file->value.s) {
3979 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003980 char buf[32];
3981
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003982 bs = bdrv_new("");
3983
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003984 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003985 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003986 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003987 goto out;
3988 }
3989 bdrv_get_geometry(bs, &size);
3990 size *= 512;
3991
3992 snprintf(buf, sizeof(buf), "%" PRId64, size);
3993 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3994 } else {
3995 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003996 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003997 goto out;
3998 }
3999 }
4000
4001 printf("Formatting '%s', fmt=%s ", filename, fmt);
4002 print_option_parameters(param);
4003 puts("");
4004
4005 ret = bdrv_create(drv, filename, param);
4006
4007 if (ret < 0) {
4008 if (ret == -ENOTSUP) {
4009 error_report("Formatting or formatting option not supported for "
4010 "file format '%s'", fmt);
4011 } else if (ret == -EFBIG) {
4012 error_report("The image size is too large for file format '%s'",
4013 fmt);
4014 } else {
4015 error_report("%s: error while creating %s: %s", filename, fmt,
4016 strerror(-ret));
4017 }
4018 }
4019
4020out:
4021 free_option_parameters(create_options);
4022 free_option_parameters(param);
4023
4024 if (bs) {
4025 bdrv_delete(bs);
4026 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01004027
4028 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004029}
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004030
4031void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
4032 BlockDriverCompletionFunc *cb, void *opaque)
4033{
4034 BlockJob *job;
4035
4036 if (bs->job || bdrv_in_use(bs)) {
4037 return NULL;
4038 }
4039 bdrv_set_in_use(bs, 1);
4040
4041 job = g_malloc0(job_type->instance_size);
4042 job->job_type = job_type;
4043 job->bs = bs;
4044 job->cb = cb;
4045 job->opaque = opaque;
4046 bs->job = job;
4047 return job;
4048}
4049
4050void block_job_complete(BlockJob *job, int ret)
4051{
4052 BlockDriverState *bs = job->bs;
4053
4054 assert(bs->job == job);
4055 job->cb(job->opaque, ret);
4056 bs->job = NULL;
4057 g_free(job);
4058 bdrv_set_in_use(bs, 0);
4059}
4060
4061int block_job_set_speed(BlockJob *job, int64_t value)
4062{
4063 if (!job->job_type->set_speed) {
4064 return -ENOTSUP;
4065 }
4066 return job->job_type->set_speed(job, value);
4067}
4068
4069void block_job_cancel(BlockJob *job)
4070{
4071 job->cancelled = true;
4072}
4073
4074bool block_job_is_cancelled(BlockJob *job)
4075{
4076 return job->cancelled;
4077}