blob: 9b3cb756465ca235958a5edf886175f1142a792b [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020051static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000052static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000054 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000055static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000057 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020058static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
59 int64_t sector_num, int nb_sectors,
60 QEMUIOVector *iov);
61static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
62 int64_t sector_num, int nb_sectors,
63 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010064static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010066static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010068static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
69 int64_t sector_num,
70 QEMUIOVector *qiov,
71 int nb_sectors,
72 BlockDriverCompletionFunc *cb,
73 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010074 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010075static void coroutine_fn bdrv_co_do_rw(void *opaque);
bellardec530c82006-04-25 22:36:06 +000076
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080077static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
78 bool is_write, double elapsed_time, uint64_t *wait);
79static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
80 double elapsed_time, uint64_t *wait);
81static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
82 bool is_write, int64_t *wait);
83
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010084static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
85 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000086
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010087static QLIST_HEAD(, BlockDriver) bdrv_drivers =
88 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000089
Markus Armbrusterf9092b12010-06-25 10:33:39 +020090/* The device to use for VM snapshots */
91static BlockDriverState *bs_snapshots;
92
Markus Armbrustereb852012009-10-27 18:41:44 +010093/* If non-zero, use only whitelisted block drivers */
94static int use_bdrv_whitelist;
95
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +000096#ifdef _WIN32
97static int is_windows_drive_prefix(const char *filename)
98{
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
101 filename[1] == ':');
102}
103
104int is_windows_drive(const char *filename)
105{
106 if (is_windows_drive_prefix(filename) &&
107 filename[2] == '\0')
108 return 1;
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
111 return 1;
112 return 0;
113}
114#endif
115
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800116/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800117void bdrv_io_limits_disable(BlockDriverState *bs)
118{
119 bs->io_limits_enabled = false;
120
121 while (qemu_co_queue_next(&bs->throttled_reqs));
122
123 if (bs->block_timer) {
124 qemu_del_timer(bs->block_timer);
125 qemu_free_timer(bs->block_timer);
126 bs->block_timer = NULL;
127 }
128
129 bs->slice_start = 0;
130 bs->slice_end = 0;
131 bs->slice_time = 0;
132 memset(&bs->io_base, 0, sizeof(bs->io_base));
133}
134
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800135static void bdrv_block_timer(void *opaque)
136{
137 BlockDriverState *bs = opaque;
138
139 qemu_co_queue_next(&bs->throttled_reqs);
140}
141
142void bdrv_io_limits_enable(BlockDriverState *bs)
143{
144 qemu_co_queue_init(&bs->throttled_reqs);
145 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
146 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
147 bs->slice_start = qemu_get_clock_ns(vm_clock);
148 bs->slice_end = bs->slice_start + bs->slice_time;
149 memset(&bs->io_base, 0, sizeof(bs->io_base));
150 bs->io_limits_enabled = true;
151}
152
153bool bdrv_io_limits_enabled(BlockDriverState *bs)
154{
155 BlockIOLimit *io_limits = &bs->io_limits;
156 return io_limits->bps[BLOCK_IO_LIMIT_READ]
157 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
158 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
159 || io_limits->iops[BLOCK_IO_LIMIT_READ]
160 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
161 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
162}
163
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800164static void bdrv_io_limits_intercept(BlockDriverState *bs,
165 bool is_write, int nb_sectors)
166{
167 int64_t wait_time = -1;
168
169 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
170 qemu_co_queue_wait(&bs->throttled_reqs);
171 }
172
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
178 */
179
180 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
181 qemu_mod_timer(bs->block_timer,
182 wait_time + qemu_get_clock_ns(vm_clock));
183 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
184 }
185
186 qemu_co_queue_next(&bs->throttled_reqs);
187}
188
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000189/* check if the path starts with "<protocol>:" */
190static int path_has_protocol(const char *path)
191{
192#ifdef _WIN32
193 if (is_windows_drive(path) ||
194 is_windows_drive_prefix(path)) {
195 return 0;
196 }
197#endif
198
199 return strchr(path, ':') != NULL;
200}
201
bellard83f64092006-08-01 16:21:11 +0000202int path_is_absolute(const char *path)
203{
204 const char *p;
bellard21664422007-01-07 18:22:37 +0000205#ifdef _WIN32
206 /* specific case for names like: "\\.\d:" */
207 if (*path == '/' || *path == '\\')
208 return 1;
209#endif
bellard83f64092006-08-01 16:21:11 +0000210 p = strchr(path, ':');
211 if (p)
212 p++;
213 else
214 p = path;
bellard3b9f94e2007-01-07 17:27:07 +0000215#ifdef _WIN32
216 return (*p == '/' || *p == '\\');
217#else
218 return (*p == '/');
219#endif
bellard83f64092006-08-01 16:21:11 +0000220}
221
222/* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
224 supported. */
225void path_combine(char *dest, int dest_size,
226 const char *base_path,
227 const char *filename)
228{
229 const char *p, *p1;
230 int len;
231
232 if (dest_size <= 0)
233 return;
234 if (path_is_absolute(filename)) {
235 pstrcpy(dest, dest_size, filename);
236 } else {
237 p = strchr(base_path, ':');
238 if (p)
239 p++;
240 else
241 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000242 p1 = strrchr(base_path, '/');
243#ifdef _WIN32
244 {
245 const char *p2;
246 p2 = strrchr(base_path, '\\');
247 if (!p1 || p2 > p1)
248 p1 = p2;
249 }
250#endif
bellard83f64092006-08-01 16:21:11 +0000251 if (p1)
252 p1++;
253 else
254 p1 = base_path;
255 if (p1 > p)
256 p = p1;
257 len = p - base_path;
258 if (len > dest_size - 1)
259 len = dest_size - 1;
260 memcpy(dest, base_path, len);
261 dest[len] = '\0';
262 pstrcat(dest, dest_size, filename);
263 }
264}
265
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500266void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000267{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200270 bdrv->bdrv_co_readv = bdrv_co_readv_em;
271 bdrv->bdrv_co_writev = bdrv_co_writev_em;
272
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
275 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200276 if (!bdrv->bdrv_aio_readv) {
277 /* add AIO emulation layer */
278 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
279 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200280 }
bellard83f64092006-08-01 16:21:11 +0000281 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200282
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100283 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000284}
bellardb3380822004-03-14 21:38:54 +0000285
286/* create a new block device (by default it is empty) */
287BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000288{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100289 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000290
Anthony Liguori7267c092011-08-20 22:09:37 -0500291 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000292 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000293 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100294 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000295 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300296 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000297 return bs;
298}
299
bellardea2384d2004-08-01 21:59:26 +0000300BlockDriver *bdrv_find_format(const char *format_name)
301{
302 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100303 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
304 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000305 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100306 }
bellardea2384d2004-08-01 21:59:26 +0000307 }
308 return NULL;
309}
310
Markus Armbrustereb852012009-10-27 18:41:44 +0100311static int bdrv_is_whitelisted(BlockDriver *drv)
312{
313 static const char *whitelist[] = {
314 CONFIG_BDRV_WHITELIST
315 };
316 const char **p;
317
318 if (!whitelist[0])
319 return 1; /* no whitelist, anything goes */
320
321 for (p = whitelist; *p; p++) {
322 if (!strcmp(drv->format_name, *p)) {
323 return 1;
324 }
325 }
326 return 0;
327}
328
329BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
330{
331 BlockDriver *drv = bdrv_find_format(format_name);
332 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
333}
334
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200335int bdrv_create(BlockDriver *drv, const char* filename,
336 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000337{
338 if (!drv->bdrv_create)
339 return -ENOTSUP;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200340
341 return drv->bdrv_create(filename, options);
bellardea2384d2004-08-01 21:59:26 +0000342}
343
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200344int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
345{
346 BlockDriver *drv;
347
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900348 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200349 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000350 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200351 }
352
353 return bdrv_create(drv, filename, options);
354}
355
bellardd5249392004-08-03 21:14:23 +0000356#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000357void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000358{
bellard3b9f94e2007-01-07 17:27:07 +0000359 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000360
bellard3b9f94e2007-01-07 17:27:07 +0000361 GetTempPath(MAX_PATH, temp_dir);
362 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000363}
364#else
bellard95389c82005-12-18 18:28:15 +0000365void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000366{
367 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000368 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000369 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000370 tmpdir = getenv("TMPDIR");
371 if (!tmpdir)
372 tmpdir = "/tmp";
373 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000374 fd = mkstemp(filename);
375 close(fd);
376}
bellardd5249392004-08-03 21:14:23 +0000377#endif
bellardea2384d2004-08-01 21:59:26 +0000378
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200379/*
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
382 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200383static BlockDriver *find_hdev_driver(const char *filename)
384{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200385 int score_max = 0, score;
386 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200387
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100388 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200389 if (d->bdrv_probe_device) {
390 score = d->bdrv_probe_device(filename);
391 if (score > score_max) {
392 score_max = score;
393 drv = d;
394 }
395 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200396 }
397
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200398 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200399}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200400
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900401BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200402{
403 BlockDriver *drv1;
404 char protocol[128];
405 int len;
406 const char *p;
407
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
409
Christoph Hellwig39508e72010-06-23 12:25:17 +0200410 /*
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
416 */
417 drv1 = find_hdev_driver(filename);
418 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200419 return drv1;
420 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200421
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000422 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200423 return bdrv_find_format("file");
424 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000425 p = strchr(filename, ':');
426 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200427 len = p - filename;
428 if (len > sizeof(protocol) - 1)
429 len = sizeof(protocol) - 1;
430 memcpy(protocol, filename, len);
431 protocol[len] = '\0';
432 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
433 if (drv1->protocol_name &&
434 !strcmp(drv1->protocol_name, protocol)) {
435 return drv1;
436 }
437 }
438 return NULL;
439}
440
Stefan Weilc98ac352010-07-21 21:51:51 +0200441static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000442{
bellard83f64092006-08-01 16:21:11 +0000443 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000444 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000445 uint8_t buf[2048];
446 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000447
Naphtali Spreif5edb012010-01-17 16:48:13 +0200448 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200449 if (ret < 0) {
450 *pdrv = NULL;
451 return ret;
452 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700453
Kevin Wolf08a00552010-06-01 18:37:31 +0200454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700456 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200457 drv = bdrv_find_format("raw");
458 if (!drv) {
459 ret = -ENOENT;
460 }
461 *pdrv = drv;
462 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700463 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700464
bellard83f64092006-08-01 16:21:11 +0000465 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
466 bdrv_delete(bs);
467 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200468 *pdrv = NULL;
469 return ret;
bellard83f64092006-08-01 16:21:11 +0000470 }
471
bellardea2384d2004-08-01 21:59:26 +0000472 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200473 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100474 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000475 if (drv1->bdrv_probe) {
476 score = drv1->bdrv_probe(buf, ret, filename);
477 if (score > score_max) {
478 score_max = score;
479 drv = drv1;
480 }
bellardea2384d2004-08-01 21:59:26 +0000481 }
482 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200483 if (!drv) {
484 ret = -ENOENT;
485 }
486 *pdrv = drv;
487 return ret;
bellardea2384d2004-08-01 21:59:26 +0000488}
489
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100490/**
491 * Set the current 'total_sectors' value
492 */
493static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
494{
495 BlockDriver *drv = bs->drv;
496
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
498 if (bs->sg)
499 return 0;
500
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv->bdrv_getlength) {
503 int64_t length = drv->bdrv_getlength(bs);
504 if (length < 0) {
505 return length;
506 }
507 hint = length >> BDRV_SECTOR_BITS;
508 }
509
510 bs->total_sectors = hint;
511 return 0;
512}
513
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100514/**
515 * Set open flags for a given cache mode
516 *
517 * Return 0 on success, -1 if the cache mode was invalid.
518 */
519int bdrv_parse_cache_flags(const char *mode, int *flags)
520{
521 *flags &= ~BDRV_O_CACHE_MASK;
522
523 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
524 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100525 } else if (!strcmp(mode, "directsync")) {
526 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100527 } else if (!strcmp(mode, "writeback")) {
528 *flags |= BDRV_O_CACHE_WB;
529 } else if (!strcmp(mode, "unsafe")) {
530 *flags |= BDRV_O_CACHE_WB;
531 *flags |= BDRV_O_NO_FLUSH;
532 } else if (!strcmp(mode, "writethrough")) {
533 /* this is the default */
534 } else {
535 return -1;
536 }
537
538 return 0;
539}
540
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000541/**
542 * The copy-on-read flag is actually a reference count so multiple users may
543 * use the feature without worrying about clobbering its previous state.
544 * Copy-on-read stays enabled until all users have called to disable it.
545 */
546void bdrv_enable_copy_on_read(BlockDriverState *bs)
547{
548 bs->copy_on_read++;
549}
550
551void bdrv_disable_copy_on_read(BlockDriverState *bs)
552{
553 assert(bs->copy_on_read > 0);
554 bs->copy_on_read--;
555}
556
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200557/*
Kevin Wolf57915332010-04-14 15:24:50 +0200558 * Common part for opening disk images and files
559 */
560static int bdrv_open_common(BlockDriverState *bs, const char *filename,
561 int flags, BlockDriver *drv)
562{
563 int ret, open_flags;
564
565 assert(drv != NULL);
566
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100567 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
568
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200569 bs->file = NULL;
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100570 bs->total_sectors = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200571 bs->encrypted = 0;
572 bs->valid_key = 0;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100573 bs->sg = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200574 bs->open_flags = flags;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100575 bs->growable = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200576 bs->buffer_alignment = 512;
577
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000578 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
579 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
580 bdrv_enable_copy_on_read(bs);
581 }
582
Kevin Wolf57915332010-04-14 15:24:50 +0200583 pstrcpy(bs->filename, sizeof(bs->filename), filename);
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100584 bs->backing_file[0] = '\0';
Kevin Wolf57915332010-04-14 15:24:50 +0200585
586 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
587 return -ENOTSUP;
588 }
589
590 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500591 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200592
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100593 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200594
595 /*
596 * Clear flags that are internal to the block layer before opening the
597 * image.
598 */
599 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
600
601 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200602 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200603 */
604 if (bs->is_temporary) {
605 open_flags |= BDRV_O_RDWR;
606 }
607
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100608 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
609
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200610 /* Open the image, either directly or using a protocol */
611 if (drv->bdrv_file_open) {
612 ret = drv->bdrv_file_open(bs, filename, open_flags);
613 } else {
614 ret = bdrv_file_open(&bs->file, filename, open_flags);
615 if (ret >= 0) {
616 ret = drv->bdrv_open(bs, open_flags);
617 }
618 }
619
Kevin Wolf57915332010-04-14 15:24:50 +0200620 if (ret < 0) {
621 goto free_and_fail;
622 }
623
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100624 ret = refresh_total_sectors(bs, bs->total_sectors);
625 if (ret < 0) {
626 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200627 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100628
Kevin Wolf57915332010-04-14 15:24:50 +0200629#ifndef _WIN32
630 if (bs->is_temporary) {
631 unlink(filename);
632 }
633#endif
634 return 0;
635
636free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200637 if (bs->file) {
638 bdrv_delete(bs->file);
639 bs->file = NULL;
640 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500641 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200642 bs->opaque = NULL;
643 bs->drv = NULL;
644 return ret;
645}
646
647/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200648 * Opens a file using a protocol (file, host_device, nbd, ...)
649 */
bellard83f64092006-08-01 16:21:11 +0000650int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000651{
bellard83f64092006-08-01 16:21:11 +0000652 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200653 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000654 int ret;
655
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900656 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200657 if (!drv) {
658 return -ENOENT;
659 }
660
bellard83f64092006-08-01 16:21:11 +0000661 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200662 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000663 if (ret < 0) {
664 bdrv_delete(bs);
665 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000666 }
aliguori71d07702009-03-03 17:37:16 +0000667 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000668 *pbs = bs;
669 return 0;
bellardea2384d2004-08-01 21:59:26 +0000670}
bellardfc01f7e2003-06-30 10:03:06 +0000671
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200672/*
673 * Opens a disk image (raw, qcow2, vmdk, ...)
674 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200675int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
676 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000677{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200678 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200679 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000680
bellard83f64092006-08-01 16:21:11 +0000681 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000682 BlockDriverState *bs1;
683 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000684 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200685 BlockDriver *bdrv_qcow2;
686 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200687 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000688
bellardea2384d2004-08-01 21:59:26 +0000689 /* if snapshot, we create a temporary backing file and open it
690 instead of opening 'filename' directly */
691
692 /* if there is a backing file, use it */
693 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200694 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000695 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000696 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000697 return ret;
bellardea2384d2004-08-01 21:59:26 +0000698 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200699 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000700
701 if (bs1->drv && bs1->drv->protocol_name)
702 is_protocol = 1;
703
bellardea2384d2004-08-01 21:59:26 +0000704 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000705
bellardea2384d2004-08-01 21:59:26 +0000706 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000707
708 /* Real path is meaningless for protocols */
709 if (is_protocol)
710 snprintf(backing_filename, sizeof(backing_filename),
711 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000712 else if (!realpath(filename, backing_filename))
713 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000714
Kevin Wolf91a073a2009-05-27 14:48:06 +0200715 bdrv_qcow2 = bdrv_find_format("qcow2");
716 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
717
Jes Sorensen3e829902010-05-27 16:20:30 +0200718 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200719 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
720 if (drv) {
721 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
722 drv->format_name);
723 }
724
725 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200726 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000727 if (ret < 0) {
728 return ret;
bellardea2384d2004-08-01 21:59:26 +0000729 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200730
bellardea2384d2004-08-01 21:59:26 +0000731 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200732 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000733 bs->is_temporary = 1;
734 }
bellard712e7872005-04-28 21:09:32 +0000735
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200736 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200737 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200738 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000739 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100740
aliguori51d7c002009-03-05 23:00:29 +0000741 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000742 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000743 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200744
745 /* Open the image */
746 ret = bdrv_open_common(bs, filename, flags, drv);
747 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100748 goto unlink_and_fail;
749 }
750
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200751 /* If there is a backing file, use it */
752 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
753 char backing_filename[PATH_MAX];
754 int back_flags;
755 BlockDriver *back_drv = NULL;
756
757 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000758
759 if (path_has_protocol(bs->backing_file)) {
760 pstrcpy(backing_filename, sizeof(backing_filename),
761 bs->backing_file);
762 } else {
763 path_combine(backing_filename, sizeof(backing_filename),
764 filename, bs->backing_file);
765 }
766
767 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200768 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000769 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200770
771 /* backing files always opened read-only */
772 back_flags =
773 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
774
775 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
776 if (ret < 0) {
777 bdrv_close(bs);
778 return ret;
779 }
780 if (bs->is_temporary) {
781 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
782 } else {
783 /* base image inherits from "parent" */
784 bs->backing_hd->keep_read_only = bs->keep_read_only;
785 }
786 }
787
788 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200789 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200790 }
791
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800792 /* throttling disk I/O limits */
793 if (bs->io_limits_enabled) {
794 bdrv_io_limits_enable(bs);
795 }
796
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200797 return 0;
798
799unlink_and_fail:
800 if (bs->is_temporary) {
801 unlink(filename);
802 }
803 return ret;
804}
805
bellardfc01f7e2003-06-30 10:03:06 +0000806void bdrv_close(BlockDriverState *bs)
807{
bellard19cb3732006-08-19 11:45:59 +0000808 if (bs->drv) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200809 if (bs == bs_snapshots) {
810 bs_snapshots = NULL;
811 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100812 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000813 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100814 bs->backing_hd = NULL;
815 }
bellardea2384d2004-08-01 21:59:26 +0000816 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500817 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000818#ifdef _WIN32
819 if (bs->is_temporary) {
820 unlink(bs->filename);
821 }
bellard67b915a2004-03-31 23:37:16 +0000822#endif
bellardea2384d2004-08-01 21:59:26 +0000823 bs->opaque = NULL;
824 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000825 bs->copy_on_read = 0;
bellardb3380822004-03-14 21:38:54 +0000826
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200827 if (bs->file != NULL) {
828 bdrv_close(bs->file);
829 }
830
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200831 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000832 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800833
834 /*throttling disk I/O limits*/
835 if (bs->io_limits_enabled) {
836 bdrv_io_limits_disable(bs);
837 }
bellardb3380822004-03-14 21:38:54 +0000838}
839
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900840void bdrv_close_all(void)
841{
842 BlockDriverState *bs;
843
844 QTAILQ_FOREACH(bs, &bdrv_states, list) {
845 bdrv_close(bs);
846 }
847}
848
Ryan Harperd22b2f42011-03-29 20:51:47 -0500849/* make a BlockDriverState anonymous by removing from bdrv_state list.
850 Also, NULL terminate the device_name to prevent double remove */
851void bdrv_make_anon(BlockDriverState *bs)
852{
853 if (bs->device_name[0] != '\0') {
854 QTAILQ_REMOVE(&bdrv_states, bs, list);
855 }
856 bs->device_name[0] = '\0';
857}
858
bellardb3380822004-03-14 21:38:54 +0000859void bdrv_delete(BlockDriverState *bs)
860{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200861 assert(!bs->dev);
Markus Armbruster18846de2010-06-29 16:58:30 +0200862
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100863 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -0500864 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +0000865
bellardb3380822004-03-14 21:38:54 +0000866 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200867 if (bs->file != NULL) {
868 bdrv_delete(bs->file);
869 }
870
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200871 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -0500872 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +0000873}
874
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200875int bdrv_attach_dev(BlockDriverState *bs, void *dev)
876/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +0200877{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200878 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +0200879 return -EBUSY;
880 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200881 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -0300882 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +0200883 return 0;
884}
885
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200886/* TODO qdevified devices don't use this, remove when devices are qdevified */
887void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +0200888{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200889 if (bdrv_attach_dev(bs, dev) < 0) {
890 abort();
891 }
892}
893
894void bdrv_detach_dev(BlockDriverState *bs, void *dev)
895/* TODO change to DeviceState *dev when all users are qdevified */
896{
897 assert(bs->dev == dev);
898 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +0200899 bs->dev_ops = NULL;
900 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +0200901 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +0200902}
903
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200904/* TODO change to return DeviceState * when all users are qdevified */
905void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +0200906{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200907 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +0200908}
909
Markus Armbruster0e49de52011-08-03 15:07:41 +0200910void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
911 void *opaque)
912{
913 bs->dev_ops = ops;
914 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200915 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
916 bs_snapshots = NULL;
917 }
Markus Armbruster0e49de52011-08-03 15:07:41 +0200918}
919
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200920static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +0200921{
Markus Armbruster145feb12011-08-03 15:07:42 +0200922 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200923 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Markus Armbruster145feb12011-08-03 15:07:42 +0200924 }
925}
926
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200927bool bdrv_dev_has_removable_media(BlockDriverState *bs)
928{
929 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
930}
931
Paolo Bonzini025ccaa2011-11-07 17:50:13 +0100932void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
933{
934 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
935 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
936 }
937}
938
Markus Armbrustere4def802011-09-06 18:58:53 +0200939bool bdrv_dev_is_tray_open(BlockDriverState *bs)
940{
941 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
942 return bs->dev_ops->is_tray_open(bs->dev_opaque);
943 }
944 return false;
945}
946
Markus Armbruster145feb12011-08-03 15:07:42 +0200947static void bdrv_dev_resize_cb(BlockDriverState *bs)
948{
949 if (bs->dev_ops && bs->dev_ops->resize_cb) {
950 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +0200951 }
952}
953
Markus Armbrusterf1076392011-09-06 18:58:46 +0200954bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
955{
956 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
957 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
958 }
959 return false;
960}
961
aliguorie97fc192009-04-21 23:11:50 +0000962/*
963 * Run consistency checks on an image
964 *
Kevin Wolfe076f332010-06-29 11:43:13 +0200965 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +0200966 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +0200967 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +0000968 */
Kevin Wolfe076f332010-06-29 11:43:13 +0200969int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +0000970{
971 if (bs->drv->bdrv_check == NULL) {
972 return -ENOTSUP;
973 }
974
Kevin Wolfe076f332010-06-29 11:43:13 +0200975 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +0200976 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +0000977}
978
Kevin Wolf8a426612010-07-16 17:17:01 +0200979#define COMMIT_BUF_SECTORS 2048
980
bellard33e39632003-07-06 17:15:21 +0000981/* commit COW file into the raw image */
982int bdrv_commit(BlockDriverState *bs)
983{
bellard19cb3732006-08-19 11:45:59 +0000984 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +0200985 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +0200986 int64_t sector, total_sectors;
987 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200988 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +0200989 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200990 char filename[1024];
991 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +0000992
bellard19cb3732006-08-19 11:45:59 +0000993 if (!drv)
994 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200995
996 if (!bs->backing_hd) {
997 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +0000998 }
999
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001000 if (bs->backing_hd->keep_read_only) {
1001 return -EACCES;
1002 }
Kevin Wolfee181192010-08-05 13:05:22 +02001003
1004 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001005 ro = bs->backing_hd->read_only;
1006 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1007 open_flags = bs->backing_hd->open_flags;
1008
1009 if (ro) {
1010 /* re-open as RW */
1011 bdrv_delete(bs->backing_hd);
1012 bs->backing_hd = NULL;
1013 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001014 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1015 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001016 if (rw_ret < 0) {
1017 bdrv_delete(bs_rw);
1018 /* try to re-open read-only */
1019 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001020 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1021 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001022 if (ret < 0) {
1023 bdrv_delete(bs_ro);
1024 /* drive not functional anymore */
1025 bs->drv = NULL;
1026 return ret;
1027 }
1028 bs->backing_hd = bs_ro;
1029 return rw_ret;
1030 }
1031 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001032 }
bellardea2384d2004-08-01 21:59:26 +00001033
Jan Kiszka6ea44302009-11-30 18:21:19 +01001034 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001035 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001036
Kevin Wolf8a426612010-07-16 17:17:01 +02001037 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001038 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001039
1040 if (bdrv_read(bs, sector, buf, n) != 0) {
1041 ret = -EIO;
1042 goto ro_cleanup;
1043 }
1044
1045 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1046 ret = -EIO;
1047 goto ro_cleanup;
1048 }
bellardea2384d2004-08-01 21:59:26 +00001049 }
1050 }
bellard95389c82005-12-18 18:28:15 +00001051
Christoph Hellwig1d449522010-01-17 12:32:30 +01001052 if (drv->bdrv_make_empty) {
1053 ret = drv->bdrv_make_empty(bs);
1054 bdrv_flush(bs);
1055 }
bellard95389c82005-12-18 18:28:15 +00001056
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001057 /*
1058 * Make sure all data we wrote to the backing device is actually
1059 * stable on disk.
1060 */
1061 if (bs->backing_hd)
1062 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001063
1064ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001065 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001066
1067 if (ro) {
1068 /* re-open as RO */
1069 bdrv_delete(bs->backing_hd);
1070 bs->backing_hd = NULL;
1071 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001072 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1073 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001074 if (ret < 0) {
1075 bdrv_delete(bs_ro);
1076 /* drive not functional anymore */
1077 bs->drv = NULL;
1078 return ret;
1079 }
1080 bs->backing_hd = bs_ro;
1081 bs->backing_hd->keep_read_only = 0;
1082 }
1083
Christoph Hellwig1d449522010-01-17 12:32:30 +01001084 return ret;
bellard33e39632003-07-06 17:15:21 +00001085}
1086
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001087void bdrv_commit_all(void)
1088{
1089 BlockDriverState *bs;
1090
1091 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1092 bdrv_commit(bs);
1093 }
1094}
1095
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001096struct BdrvTrackedRequest {
1097 BlockDriverState *bs;
1098 int64_t sector_num;
1099 int nb_sectors;
1100 bool is_write;
1101 QLIST_ENTRY(BdrvTrackedRequest) list;
1102};
1103
1104/**
1105 * Remove an active request from the tracked requests list
1106 *
1107 * This function should be called when a tracked request is completing.
1108 */
1109static void tracked_request_end(BdrvTrackedRequest *req)
1110{
1111 QLIST_REMOVE(req, list);
1112}
1113
1114/**
1115 * Add an active request to the tracked requests list
1116 */
1117static void tracked_request_begin(BdrvTrackedRequest *req,
1118 BlockDriverState *bs,
1119 int64_t sector_num,
1120 int nb_sectors, bool is_write)
1121{
1122 *req = (BdrvTrackedRequest){
1123 .bs = bs,
1124 .sector_num = sector_num,
1125 .nb_sectors = nb_sectors,
1126 .is_write = is_write,
1127 };
1128
1129 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1130}
1131
Kevin Wolf756e6732010-01-12 12:55:17 +01001132/*
1133 * Return values:
1134 * 0 - success
1135 * -EINVAL - backing format specified, but no file
1136 * -ENOSPC - can't update the backing file because no space is left in the
1137 * image file header
1138 * -ENOTSUP - format driver doesn't support changing the backing file
1139 */
1140int bdrv_change_backing_file(BlockDriverState *bs,
1141 const char *backing_file, const char *backing_fmt)
1142{
1143 BlockDriver *drv = bs->drv;
1144
1145 if (drv->bdrv_change_backing_file != NULL) {
1146 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1147 } else {
1148 return -ENOTSUP;
1149 }
1150}
1151
aliguori71d07702009-03-03 17:37:16 +00001152static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1153 size_t size)
1154{
1155 int64_t len;
1156
1157 if (!bdrv_is_inserted(bs))
1158 return -ENOMEDIUM;
1159
1160 if (bs->growable)
1161 return 0;
1162
1163 len = bdrv_getlength(bs);
1164
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001165 if (offset < 0)
1166 return -EIO;
1167
1168 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001169 return -EIO;
1170
1171 return 0;
1172}
1173
1174static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1175 int nb_sectors)
1176{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001177 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1178 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001179}
1180
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001181typedef struct RwCo {
1182 BlockDriverState *bs;
1183 int64_t sector_num;
1184 int nb_sectors;
1185 QEMUIOVector *qiov;
1186 bool is_write;
1187 int ret;
1188} RwCo;
1189
1190static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1191{
1192 RwCo *rwco = opaque;
1193
1194 if (!rwco->is_write) {
1195 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1196 rwco->nb_sectors, rwco->qiov);
1197 } else {
1198 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1199 rwco->nb_sectors, rwco->qiov);
1200 }
1201}
1202
1203/*
1204 * Process a synchronous request using coroutines
1205 */
1206static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1207 int nb_sectors, bool is_write)
1208{
1209 QEMUIOVector qiov;
1210 struct iovec iov = {
1211 .iov_base = (void *)buf,
1212 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1213 };
1214 Coroutine *co;
1215 RwCo rwco = {
1216 .bs = bs,
1217 .sector_num = sector_num,
1218 .nb_sectors = nb_sectors,
1219 .qiov = &qiov,
1220 .is_write = is_write,
1221 .ret = NOT_DONE,
1222 };
1223
1224 qemu_iovec_init_external(&qiov, &iov, 1);
1225
1226 if (qemu_in_coroutine()) {
1227 /* Fast-path if already in coroutine context */
1228 bdrv_rw_co_entry(&rwco);
1229 } else {
1230 co = qemu_coroutine_create(bdrv_rw_co_entry);
1231 qemu_coroutine_enter(co, &rwco);
1232 while (rwco.ret == NOT_DONE) {
1233 qemu_aio_wait();
1234 }
1235 }
1236 return rwco.ret;
1237}
1238
bellard19cb3732006-08-19 11:45:59 +00001239/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001240int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001241 uint8_t *buf, int nb_sectors)
1242{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001243 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001244}
1245
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001246static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001247 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001248{
1249 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001250 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001251
Jan Kiszka6ea44302009-11-30 18:21:19 +01001252 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001253 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001254
1255 for (; start <= end; start++) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01001256 idx = start / (sizeof(unsigned long) * 8);
1257 bit = start % (sizeof(unsigned long) * 8);
1258 val = bs->dirty_bitmap[idx];
1259 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001260 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001261 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001262 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001263 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001264 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001265 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001266 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001267 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001268 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001269 }
1270 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001271 }
1272}
1273
ths5fafdf22007-09-16 21:08:06 +00001274/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001275 -EIO generic I/O error (may happen for all errors)
1276 -ENOMEDIUM No media inserted.
1277 -EINVAL Invalid sector number or nb_sectors
1278 -EACCES Trying to write a read-only device
1279*/
ths5fafdf22007-09-16 21:08:06 +00001280int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001281 const uint8_t *buf, int nb_sectors)
1282{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001283 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001284}
1285
aliguorieda578e2009-03-12 19:57:16 +00001286int bdrv_pread(BlockDriverState *bs, int64_t offset,
1287 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001288{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001289 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001290 int len, nb_sectors, count;
1291 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001292 int ret;
bellard83f64092006-08-01 16:21:11 +00001293
1294 count = count1;
1295 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001296 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001297 if (len > count)
1298 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001299 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001300 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001301 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1302 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001303 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001304 count -= len;
1305 if (count == 0)
1306 return count1;
1307 sector_num++;
1308 buf += len;
1309 }
1310
1311 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001312 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001313 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001314 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1315 return ret;
bellard83f64092006-08-01 16:21:11 +00001316 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001317 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001318 buf += len;
1319 count -= len;
1320 }
1321
1322 /* add data from the last sector */
1323 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001324 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1325 return ret;
bellard83f64092006-08-01 16:21:11 +00001326 memcpy(buf, tmp_buf, count);
1327 }
1328 return count1;
1329}
1330
aliguorieda578e2009-03-12 19:57:16 +00001331int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1332 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001333{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001334 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001335 int len, nb_sectors, count;
1336 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001337 int ret;
bellard83f64092006-08-01 16:21:11 +00001338
1339 count = count1;
1340 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001341 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001342 if (len > count)
1343 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001344 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001345 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001346 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1347 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001348 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001349 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1350 return ret;
bellard83f64092006-08-01 16:21:11 +00001351 count -= len;
1352 if (count == 0)
1353 return count1;
1354 sector_num++;
1355 buf += len;
1356 }
1357
1358 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001359 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001360 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001361 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1362 return ret;
bellard83f64092006-08-01 16:21:11 +00001363 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001364 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001365 buf += len;
1366 count -= len;
1367 }
1368
1369 /* add data from the last sector */
1370 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001371 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1372 return ret;
bellard83f64092006-08-01 16:21:11 +00001373 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001374 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1375 return ret;
bellard83f64092006-08-01 16:21:11 +00001376 }
1377 return count1;
1378}
bellard83f64092006-08-01 16:21:11 +00001379
Kevin Wolff08145f2010-06-16 16:38:15 +02001380/*
1381 * Writes to the file and ensures that no writes are reordered across this
1382 * request (acts as a barrier)
1383 *
1384 * Returns 0 on success, -errno in error cases.
1385 */
1386int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1387 const void *buf, int count)
1388{
1389 int ret;
1390
1391 ret = bdrv_pwrite(bs, offset, buf, count);
1392 if (ret < 0) {
1393 return ret;
1394 }
1395
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001396 /* No flush needed for cache modes that use O_DSYNC */
1397 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001398 bdrv_flush(bs);
1399 }
1400
1401 return 0;
1402}
1403
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001404/*
1405 * Handle a read request in coroutine context
1406 */
1407static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1408 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001409{
1410 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001411 BdrvTrackedRequest req;
1412 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001413
Kevin Wolfda1fa912011-07-14 17:27:13 +02001414 if (!drv) {
1415 return -ENOMEDIUM;
1416 }
1417 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1418 return -EIO;
1419 }
1420
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001421 /* throttling disk read I/O */
1422 if (bs->io_limits_enabled) {
1423 bdrv_io_limits_intercept(bs, false, nb_sectors);
1424 }
1425
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001426 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
1427 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1428 tracked_request_end(&req);
1429 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001430}
1431
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001432int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001433 int nb_sectors, QEMUIOVector *qiov)
1434{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001435 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001436
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001437 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1438}
1439
1440/*
1441 * Handle a write request in coroutine context
1442 */
1443static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1444 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1445{
1446 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001447 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001448 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001449
1450 if (!bs->drv) {
1451 return -ENOMEDIUM;
1452 }
1453 if (bs->read_only) {
1454 return -EACCES;
1455 }
1456 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1457 return -EIO;
1458 }
1459
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001460 /* throttling disk write I/O */
1461 if (bs->io_limits_enabled) {
1462 bdrv_io_limits_intercept(bs, true, nb_sectors);
1463 }
1464
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001465 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1466
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001467 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1468
Kevin Wolfda1fa912011-07-14 17:27:13 +02001469 if (bs->dirty_bitmap) {
1470 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1471 }
1472
1473 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1474 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1475 }
1476
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001477 tracked_request_end(&req);
1478
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001479 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001480}
1481
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001482int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1483 int nb_sectors, QEMUIOVector *qiov)
1484{
1485 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1486
1487 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1488}
1489
bellard83f64092006-08-01 16:21:11 +00001490/**
bellard83f64092006-08-01 16:21:11 +00001491 * Truncate file to 'offset' bytes (needed only for file protocols)
1492 */
1493int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1494{
1495 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001496 int ret;
bellard83f64092006-08-01 16:21:11 +00001497 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001498 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00001499 if (!drv->bdrv_truncate)
1500 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02001501 if (bs->read_only)
1502 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02001503 if (bdrv_in_use(bs))
1504 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001505 ret = drv->bdrv_truncate(bs, offset);
1506 if (ret == 0) {
1507 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02001508 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001509 }
1510 return ret;
bellard83f64092006-08-01 16:21:11 +00001511}
1512
1513/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08001514 * Length of a allocated file in bytes. Sparse files are counted by actual
1515 * allocated space. Return < 0 if error or unknown.
1516 */
1517int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1518{
1519 BlockDriver *drv = bs->drv;
1520 if (!drv) {
1521 return -ENOMEDIUM;
1522 }
1523 if (drv->bdrv_get_allocated_file_size) {
1524 return drv->bdrv_get_allocated_file_size(bs);
1525 }
1526 if (bs->file) {
1527 return bdrv_get_allocated_file_size(bs->file);
1528 }
1529 return -ENOTSUP;
1530}
1531
1532/**
bellard83f64092006-08-01 16:21:11 +00001533 * Length of a file in bytes. Return < 0 if error or unknown.
1534 */
1535int64_t bdrv_getlength(BlockDriverState *bs)
1536{
1537 BlockDriver *drv = bs->drv;
1538 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001539 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001540
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001541 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001542 if (drv->bdrv_getlength) {
1543 return drv->bdrv_getlength(bs);
1544 }
bellard83f64092006-08-01 16:21:11 +00001545 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001546 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00001547}
1548
bellard19cb3732006-08-19 11:45:59 +00001549/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00001550void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00001551{
bellard19cb3732006-08-19 11:45:59 +00001552 int64_t length;
1553 length = bdrv_getlength(bs);
1554 if (length < 0)
1555 length = 0;
1556 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01001557 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00001558 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00001559}
bellardcf989512004-02-16 21:56:36 +00001560
aliguorif3d54fc2008-11-25 21:50:24 +00001561struct partition {
1562 uint8_t boot_ind; /* 0x80 - active */
1563 uint8_t head; /* starting head */
1564 uint8_t sector; /* starting sector */
1565 uint8_t cyl; /* starting cylinder */
1566 uint8_t sys_ind; /* What partition type */
1567 uint8_t end_head; /* end head */
1568 uint8_t end_sector; /* end sector */
1569 uint8_t end_cyl; /* end cylinder */
1570 uint32_t start_sect; /* starting sector counting from 0 */
1571 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02001572} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00001573
1574/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1575static int guess_disk_lchs(BlockDriverState *bs,
1576 int *pcylinders, int *pheads, int *psectors)
1577{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001578 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00001579 int ret, i, heads, sectors, cylinders;
1580 struct partition *p;
1581 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00001582 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001583
1584 bdrv_get_geometry(bs, &nb_sectors);
1585
1586 ret = bdrv_read(bs, 0, buf, 1);
1587 if (ret < 0)
1588 return -1;
1589 /* test msdos magic */
1590 if (buf[510] != 0x55 || buf[511] != 0xaa)
1591 return -1;
1592 for(i = 0; i < 4; i++) {
1593 p = ((struct partition *)(buf + 0x1be)) + i;
1594 nr_sects = le32_to_cpu(p->nr_sects);
1595 if (nr_sects && p->end_head) {
1596 /* We make the assumption that the partition terminates on
1597 a cylinder boundary */
1598 heads = p->end_head + 1;
1599 sectors = p->end_sector & 63;
1600 if (sectors == 0)
1601 continue;
1602 cylinders = nb_sectors / (heads * sectors);
1603 if (cylinders < 1 || cylinders > 16383)
1604 continue;
1605 *pheads = heads;
1606 *psectors = sectors;
1607 *pcylinders = cylinders;
1608#if 0
1609 printf("guessed geometry: LCHS=%d %d %d\n",
1610 cylinders, heads, sectors);
1611#endif
1612 return 0;
1613 }
1614 }
1615 return -1;
1616}
1617
1618void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1619{
1620 int translation, lba_detected = 0;
1621 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00001622 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001623
1624 /* if a geometry hint is available, use it */
1625 bdrv_get_geometry(bs, &nb_sectors);
1626 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1627 translation = bdrv_get_translation_hint(bs);
1628 if (cylinders != 0) {
1629 *pcyls = cylinders;
1630 *pheads = heads;
1631 *psecs = secs;
1632 } else {
1633 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1634 if (heads > 16) {
1635 /* if heads > 16, it means that a BIOS LBA
1636 translation was active, so the default
1637 hardware geometry is OK */
1638 lba_detected = 1;
1639 goto default_geometry;
1640 } else {
1641 *pcyls = cylinders;
1642 *pheads = heads;
1643 *psecs = secs;
1644 /* disable any translation to be in sync with
1645 the logical geometry */
1646 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1647 bdrv_set_translation_hint(bs,
1648 BIOS_ATA_TRANSLATION_NONE);
1649 }
1650 }
1651 } else {
1652 default_geometry:
1653 /* if no geometry, use a standard physical disk geometry */
1654 cylinders = nb_sectors / (16 * 63);
1655
1656 if (cylinders > 16383)
1657 cylinders = 16383;
1658 else if (cylinders < 2)
1659 cylinders = 2;
1660 *pcyls = cylinders;
1661 *pheads = 16;
1662 *psecs = 63;
1663 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1664 if ((*pcyls * *pheads) <= 131072) {
1665 bdrv_set_translation_hint(bs,
1666 BIOS_ATA_TRANSLATION_LARGE);
1667 } else {
1668 bdrv_set_translation_hint(bs,
1669 BIOS_ATA_TRANSLATION_LBA);
1670 }
1671 }
1672 }
1673 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1674 }
1675}
1676
ths5fafdf22007-09-16 21:08:06 +00001677void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001678 int cyls, int heads, int secs)
1679{
1680 bs->cyls = cyls;
1681 bs->heads = heads;
1682 bs->secs = secs;
1683}
1684
bellard46d47672004-11-16 01:45:27 +00001685void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1686{
1687 bs->translation = translation;
1688}
1689
ths5fafdf22007-09-16 21:08:06 +00001690void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001691 int *pcyls, int *pheads, int *psecs)
1692{
1693 *pcyls = bs->cyls;
1694 *pheads = bs->heads;
1695 *psecs = bs->secs;
1696}
1697
Zhi Yong Wu0563e192011-11-03 16:57:25 +08001698/* throttling disk io limits */
1699void bdrv_set_io_limits(BlockDriverState *bs,
1700 BlockIOLimit *io_limits)
1701{
1702 bs->io_limits = *io_limits;
1703 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1704}
1705
Blue Swirl5bbdbb42011-02-12 20:43:32 +00001706/* Recognize floppy formats */
1707typedef struct FDFormat {
1708 FDriveType drive;
1709 uint8_t last_sect;
1710 uint8_t max_track;
1711 uint8_t max_head;
1712} FDFormat;
1713
1714static const FDFormat fd_formats[] = {
1715 /* First entry is default format */
1716 /* 1.44 MB 3"1/2 floppy disks */
1717 { FDRIVE_DRV_144, 18, 80, 1, },
1718 { FDRIVE_DRV_144, 20, 80, 1, },
1719 { FDRIVE_DRV_144, 21, 80, 1, },
1720 { FDRIVE_DRV_144, 21, 82, 1, },
1721 { FDRIVE_DRV_144, 21, 83, 1, },
1722 { FDRIVE_DRV_144, 22, 80, 1, },
1723 { FDRIVE_DRV_144, 23, 80, 1, },
1724 { FDRIVE_DRV_144, 24, 80, 1, },
1725 /* 2.88 MB 3"1/2 floppy disks */
1726 { FDRIVE_DRV_288, 36, 80, 1, },
1727 { FDRIVE_DRV_288, 39, 80, 1, },
1728 { FDRIVE_DRV_288, 40, 80, 1, },
1729 { FDRIVE_DRV_288, 44, 80, 1, },
1730 { FDRIVE_DRV_288, 48, 80, 1, },
1731 /* 720 kB 3"1/2 floppy disks */
1732 { FDRIVE_DRV_144, 9, 80, 1, },
1733 { FDRIVE_DRV_144, 10, 80, 1, },
1734 { FDRIVE_DRV_144, 10, 82, 1, },
1735 { FDRIVE_DRV_144, 10, 83, 1, },
1736 { FDRIVE_DRV_144, 13, 80, 1, },
1737 { FDRIVE_DRV_144, 14, 80, 1, },
1738 /* 1.2 MB 5"1/4 floppy disks */
1739 { FDRIVE_DRV_120, 15, 80, 1, },
1740 { FDRIVE_DRV_120, 18, 80, 1, },
1741 { FDRIVE_DRV_120, 18, 82, 1, },
1742 { FDRIVE_DRV_120, 18, 83, 1, },
1743 { FDRIVE_DRV_120, 20, 80, 1, },
1744 /* 720 kB 5"1/4 floppy disks */
1745 { FDRIVE_DRV_120, 9, 80, 1, },
1746 { FDRIVE_DRV_120, 11, 80, 1, },
1747 /* 360 kB 5"1/4 floppy disks */
1748 { FDRIVE_DRV_120, 9, 40, 1, },
1749 { FDRIVE_DRV_120, 9, 40, 0, },
1750 { FDRIVE_DRV_120, 10, 41, 1, },
1751 { FDRIVE_DRV_120, 10, 42, 1, },
1752 /* 320 kB 5"1/4 floppy disks */
1753 { FDRIVE_DRV_120, 8, 40, 1, },
1754 { FDRIVE_DRV_120, 8, 40, 0, },
1755 /* 360 kB must match 5"1/4 better than 3"1/2... */
1756 { FDRIVE_DRV_144, 9, 80, 0, },
1757 /* end */
1758 { FDRIVE_DRV_NONE, -1, -1, 0, },
1759};
1760
1761void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1762 int *max_track, int *last_sect,
1763 FDriveType drive_in, FDriveType *drive)
1764{
1765 const FDFormat *parse;
1766 uint64_t nb_sectors, size;
1767 int i, first_match, match;
1768
1769 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1770 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1771 /* User defined disk */
1772 } else {
1773 bdrv_get_geometry(bs, &nb_sectors);
1774 match = -1;
1775 first_match = -1;
1776 for (i = 0; ; i++) {
1777 parse = &fd_formats[i];
1778 if (parse->drive == FDRIVE_DRV_NONE) {
1779 break;
1780 }
1781 if (drive_in == parse->drive ||
1782 drive_in == FDRIVE_DRV_NONE) {
1783 size = (parse->max_head + 1) * parse->max_track *
1784 parse->last_sect;
1785 if (nb_sectors == size) {
1786 match = i;
1787 break;
1788 }
1789 if (first_match == -1) {
1790 first_match = i;
1791 }
1792 }
1793 }
1794 if (match == -1) {
1795 if (first_match == -1) {
1796 match = 1;
1797 } else {
1798 match = first_match;
1799 }
1800 parse = &fd_formats[match];
1801 }
1802 *nb_heads = parse->max_head + 1;
1803 *max_track = parse->max_track;
1804 *last_sect = parse->last_sect;
1805 *drive = parse->drive;
1806 }
1807}
1808
bellard46d47672004-11-16 01:45:27 +00001809int bdrv_get_translation_hint(BlockDriverState *bs)
1810{
1811 return bs->translation;
1812}
1813
Markus Armbrusterabd7f682010-06-02 18:55:17 +02001814void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1815 BlockErrorAction on_write_error)
1816{
1817 bs->on_read_error = on_read_error;
1818 bs->on_write_error = on_write_error;
1819}
1820
1821BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1822{
1823 return is_read ? bs->on_read_error : bs->on_write_error;
1824}
1825
bellardb3380822004-03-14 21:38:54 +00001826int bdrv_is_read_only(BlockDriverState *bs)
1827{
1828 return bs->read_only;
1829}
1830
ths985a03b2007-12-24 16:10:43 +00001831int bdrv_is_sg(BlockDriverState *bs)
1832{
1833 return bs->sg;
1834}
1835
Christoph Hellwige900a7b2009-09-04 19:01:15 +02001836int bdrv_enable_write_cache(BlockDriverState *bs)
1837{
1838 return bs->enable_write_cache;
1839}
1840
bellardea2384d2004-08-01 21:59:26 +00001841int bdrv_is_encrypted(BlockDriverState *bs)
1842{
1843 if (bs->backing_hd && bs->backing_hd->encrypted)
1844 return 1;
1845 return bs->encrypted;
1846}
1847
aliguoric0f4ce72009-03-05 23:01:01 +00001848int bdrv_key_required(BlockDriverState *bs)
1849{
1850 BlockDriverState *backing_hd = bs->backing_hd;
1851
1852 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1853 return 1;
1854 return (bs->encrypted && !bs->valid_key);
1855}
1856
bellardea2384d2004-08-01 21:59:26 +00001857int bdrv_set_key(BlockDriverState *bs, const char *key)
1858{
1859 int ret;
1860 if (bs->backing_hd && bs->backing_hd->encrypted) {
1861 ret = bdrv_set_key(bs->backing_hd, key);
1862 if (ret < 0)
1863 return ret;
1864 if (!bs->encrypted)
1865 return 0;
1866 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02001867 if (!bs->encrypted) {
1868 return -EINVAL;
1869 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1870 return -ENOMEDIUM;
1871 }
aliguoric0f4ce72009-03-05 23:01:01 +00001872 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00001873 if (ret < 0) {
1874 bs->valid_key = 0;
1875 } else if (!bs->valid_key) {
1876 bs->valid_key = 1;
1877 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001878 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00001879 }
aliguoric0f4ce72009-03-05 23:01:01 +00001880 return ret;
bellardea2384d2004-08-01 21:59:26 +00001881}
1882
1883void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1884{
bellard19cb3732006-08-19 11:45:59 +00001885 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00001886 buf[0] = '\0';
1887 } else {
1888 pstrcpy(buf, buf_size, bs->drv->format_name);
1889 }
1890}
1891
ths5fafdf22007-09-16 21:08:06 +00001892void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00001893 void *opaque)
1894{
1895 BlockDriver *drv;
1896
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01001897 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00001898 it(opaque, drv->format_name);
1899 }
1900}
1901
bellardb3380822004-03-14 21:38:54 +00001902BlockDriverState *bdrv_find(const char *name)
1903{
1904 BlockDriverState *bs;
1905
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001906 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1907 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00001908 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001909 }
bellardb3380822004-03-14 21:38:54 +00001910 }
1911 return NULL;
1912}
1913
Markus Armbruster2f399b02010-06-02 18:55:20 +02001914BlockDriverState *bdrv_next(BlockDriverState *bs)
1915{
1916 if (!bs) {
1917 return QTAILQ_FIRST(&bdrv_states);
1918 }
1919 return QTAILQ_NEXT(bs, list);
1920}
1921
aliguori51de9762009-03-05 23:00:43 +00001922void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00001923{
1924 BlockDriverState *bs;
1925
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001926 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00001927 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00001928 }
1929}
1930
bellardea2384d2004-08-01 21:59:26 +00001931const char *bdrv_get_device_name(BlockDriverState *bs)
1932{
1933 return bs->device_name;
1934}
1935
aliguoric6ca28d2008-10-06 13:55:43 +00001936void bdrv_flush_all(void)
1937{
1938 BlockDriverState *bs;
1939
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001940 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Markus Armbrusterc602a482011-08-03 15:08:10 +02001941 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
aliguoric6ca28d2008-10-06 13:55:43 +00001942 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001943 }
1944 }
aliguoric6ca28d2008-10-06 13:55:43 +00001945}
1946
Kevin Wolff2feebb2010-04-14 17:30:35 +02001947int bdrv_has_zero_init(BlockDriverState *bs)
1948{
1949 assert(bs->drv);
1950
Kevin Wolf336c1c12010-07-28 11:26:29 +02001951 if (bs->drv->bdrv_has_zero_init) {
1952 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02001953 }
1954
1955 return 1;
1956}
1957
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00001958typedef struct BdrvCoIsAllocatedData {
1959 BlockDriverState *bs;
1960 int64_t sector_num;
1961 int nb_sectors;
1962 int *pnum;
1963 int ret;
1964 bool done;
1965} BdrvCoIsAllocatedData;
1966
thsf58c7b32008-06-05 21:53:49 +00001967/*
1968 * Returns true iff the specified sector is present in the disk image. Drivers
1969 * not implementing the functionality are assumed to not support backing files,
1970 * hence all their sectors are reported as allocated.
1971 *
1972 * 'pnum' is set to the number of sectors (including and immediately following
1973 * the specified sector) that are known to be in the same
1974 * allocated/unallocated state.
1975 *
1976 * 'nb_sectors' is the max value 'pnum' should be set to.
1977 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00001978int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
1979 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00001980{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00001981 if (!bs->drv->bdrv_co_is_allocated) {
1982 int64_t n;
thsf58c7b32008-06-05 21:53:49 +00001983 if (sector_num >= bs->total_sectors) {
1984 *pnum = 0;
1985 return 0;
1986 }
1987 n = bs->total_sectors - sector_num;
1988 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1989 return 1;
1990 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00001991
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00001992 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
1993}
1994
1995/* Coroutine wrapper for bdrv_is_allocated() */
1996static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
1997{
1998 BdrvCoIsAllocatedData *data = opaque;
1999 BlockDriverState *bs = data->bs;
2000
2001 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2002 data->pnum);
2003 data->done = true;
2004}
2005
2006/*
2007 * Synchronous wrapper around bdrv_co_is_allocated().
2008 *
2009 * See bdrv_co_is_allocated() for details.
2010 */
2011int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2012 int *pnum)
2013{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002014 Coroutine *co;
2015 BdrvCoIsAllocatedData data = {
2016 .bs = bs,
2017 .sector_num = sector_num,
2018 .nb_sectors = nb_sectors,
2019 .pnum = pnum,
2020 .done = false,
2021 };
2022
2023 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2024 qemu_coroutine_enter(co, &data);
2025 while (!data.done) {
2026 qemu_aio_wait();
2027 }
2028 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002029}
2030
Luiz Capitulino2582bfe2010-02-03 12:41:01 -02002031void bdrv_mon_event(const BlockDriverState *bdrv,
2032 BlockMonEventAction action, int is_read)
2033{
2034 QObject *data;
2035 const char *action_str;
2036
2037 switch (action) {
2038 case BDRV_ACTION_REPORT:
2039 action_str = "report";
2040 break;
2041 case BDRV_ACTION_IGNORE:
2042 action_str = "ignore";
2043 break;
2044 case BDRV_ACTION_STOP:
2045 action_str = "stop";
2046 break;
2047 default:
2048 abort();
2049 }
2050
2051 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2052 bdrv->device_name,
2053 action_str,
2054 is_read ? "read" : "write");
2055 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2056
2057 qobject_decref(data);
2058}
2059
Luiz Capitulinob2023812011-09-21 17:16:47 -03002060BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002061{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002062 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002063 BlockDriverState *bs;
2064
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002065 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002066 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002067
Luiz Capitulinob2023812011-09-21 17:16:47 -03002068 info->value = g_malloc0(sizeof(*info->value));
2069 info->value->device = g_strdup(bs->device_name);
2070 info->value->type = g_strdup("unknown");
2071 info->value->locked = bdrv_dev_is_medium_locked(bs);
2072 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002073
Markus Armbrustere4def802011-09-06 18:58:53 +02002074 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002075 info->value->has_tray_open = true;
2076 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002077 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002078
2079 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002080 info->value->has_io_status = true;
2081 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002082 }
2083
bellard19cb3732006-08-19 11:45:59 +00002084 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002085 info->value->has_inserted = true;
2086 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2087 info->value->inserted->file = g_strdup(bs->filename);
2088 info->value->inserted->ro = bs->read_only;
2089 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2090 info->value->inserted->encrypted = bs->encrypted;
2091 if (bs->backing_file[0]) {
2092 info->value->inserted->has_backing_file = true;
2093 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002094 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002095
2096 if (bs->io_limits_enabled) {
2097 info->value->inserted->bps =
2098 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2099 info->value->inserted->bps_rd =
2100 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2101 info->value->inserted->bps_wr =
2102 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2103 info->value->inserted->iops =
2104 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2105 info->value->inserted->iops_rd =
2106 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2107 info->value->inserted->iops_wr =
2108 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2109 }
bellardb3380822004-03-14 21:38:54 +00002110 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002111
2112 /* XXX: waiting for the qapi to support GSList */
2113 if (!cur_item) {
2114 head = cur_item = info;
2115 } else {
2116 cur_item->next = info;
2117 cur_item = info;
2118 }
bellardb3380822004-03-14 21:38:54 +00002119 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002120
Luiz Capitulinob2023812011-09-21 17:16:47 -03002121 return head;
bellardb3380822004-03-14 21:38:54 +00002122}
thsa36e69d2007-12-02 05:18:19 +00002123
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002124/* Consider exposing this as a full fledged QMP command */
2125static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002126{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002127 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002128
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002129 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002130
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002131 if (bs->device_name[0]) {
2132 s->has_device = true;
2133 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002134 }
2135
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002136 s->stats = g_malloc0(sizeof(*s->stats));
2137 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2138 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2139 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2140 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2141 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2142 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2143 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2144 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2145 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2146
Kevin Wolf294cc352010-04-28 14:34:01 +02002147 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002148 s->has_parent = true;
2149 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002150 }
2151
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002152 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002153}
2154
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002155BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002156{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002157 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002158 BlockDriverState *bs;
2159
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002160 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002161 BlockStatsList *info = g_malloc0(sizeof(*info));
2162 info->value = qmp_query_blockstat(bs, NULL);
2163
2164 /* XXX: waiting for the qapi to support GSList */
2165 if (!cur_item) {
2166 head = cur_item = info;
2167 } else {
2168 cur_item->next = info;
2169 cur_item = info;
2170 }
thsa36e69d2007-12-02 05:18:19 +00002171 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002172
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002173 return head;
thsa36e69d2007-12-02 05:18:19 +00002174}
bellardea2384d2004-08-01 21:59:26 +00002175
aliguori045df332009-03-05 23:00:48 +00002176const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2177{
2178 if (bs->backing_hd && bs->backing_hd->encrypted)
2179 return bs->backing_file;
2180 else if (bs->encrypted)
2181 return bs->filename;
2182 else
2183 return NULL;
2184}
2185
ths5fafdf22007-09-16 21:08:06 +00002186void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002187 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002188{
Kevin Wolf3574c602011-10-26 11:02:11 +02002189 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002190}
2191
ths5fafdf22007-09-16 21:08:06 +00002192int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002193 const uint8_t *buf, int nb_sectors)
2194{
2195 BlockDriver *drv = bs->drv;
2196 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002197 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002198 if (!drv->bdrv_write_compressed)
2199 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002200 if (bdrv_check_request(bs, sector_num, nb_sectors))
2201 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002202
Jan Kiszkac6d22832009-11-30 18:21:20 +01002203 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002204 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2205 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002206
bellardfaea38e2006-08-05 21:31:00 +00002207 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2208}
ths3b46e622007-09-17 08:09:54 +00002209
bellardfaea38e2006-08-05 21:31:00 +00002210int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2211{
2212 BlockDriver *drv = bs->drv;
2213 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002214 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002215 if (!drv->bdrv_get_info)
2216 return -ENOTSUP;
2217 memset(bdi, 0, sizeof(*bdi));
2218 return drv->bdrv_get_info(bs, bdi);
2219}
2220
Christoph Hellwig45566e92009-07-10 23:11:57 +02002221int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2222 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002223{
2224 BlockDriver *drv = bs->drv;
2225 if (!drv)
2226 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002227 if (drv->bdrv_save_vmstate)
2228 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2229 if (bs->file)
2230 return bdrv_save_vmstate(bs->file, buf, pos, size);
2231 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002232}
2233
Christoph Hellwig45566e92009-07-10 23:11:57 +02002234int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2235 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002236{
2237 BlockDriver *drv = bs->drv;
2238 if (!drv)
2239 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002240 if (drv->bdrv_load_vmstate)
2241 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2242 if (bs->file)
2243 return bdrv_load_vmstate(bs->file, buf, pos, size);
2244 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002245}
2246
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002247void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2248{
2249 BlockDriver *drv = bs->drv;
2250
2251 if (!drv || !drv->bdrv_debug_event) {
2252 return;
2253 }
2254
2255 return drv->bdrv_debug_event(bs, event);
2256
2257}
2258
bellardfaea38e2006-08-05 21:31:00 +00002259/**************************************************************/
2260/* handling of snapshots */
2261
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002262int bdrv_can_snapshot(BlockDriverState *bs)
2263{
2264 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002265 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002266 return 0;
2267 }
2268
2269 if (!drv->bdrv_snapshot_create) {
2270 if (bs->file != NULL) {
2271 return bdrv_can_snapshot(bs->file);
2272 }
2273 return 0;
2274 }
2275
2276 return 1;
2277}
2278
Blue Swirl199630b2010-07-25 20:49:34 +00002279int bdrv_is_snapshot(BlockDriverState *bs)
2280{
2281 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2282}
2283
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002284BlockDriverState *bdrv_snapshots(void)
2285{
2286 BlockDriverState *bs;
2287
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002288 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002289 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002290 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002291
2292 bs = NULL;
2293 while ((bs = bdrv_next(bs))) {
2294 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002295 bs_snapshots = bs;
2296 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002297 }
2298 }
2299 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002300}
2301
ths5fafdf22007-09-16 21:08:06 +00002302int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002303 QEMUSnapshotInfo *sn_info)
2304{
2305 BlockDriver *drv = bs->drv;
2306 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002307 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002308 if (drv->bdrv_snapshot_create)
2309 return drv->bdrv_snapshot_create(bs, sn_info);
2310 if (bs->file)
2311 return bdrv_snapshot_create(bs->file, sn_info);
2312 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002313}
2314
ths5fafdf22007-09-16 21:08:06 +00002315int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002316 const char *snapshot_id)
2317{
2318 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002319 int ret, open_ret;
2320
bellardfaea38e2006-08-05 21:31:00 +00002321 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002322 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002323 if (drv->bdrv_snapshot_goto)
2324 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2325
2326 if (bs->file) {
2327 drv->bdrv_close(bs);
2328 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2329 open_ret = drv->bdrv_open(bs, bs->open_flags);
2330 if (open_ret < 0) {
2331 bdrv_delete(bs->file);
2332 bs->drv = NULL;
2333 return open_ret;
2334 }
2335 return ret;
2336 }
2337
2338 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002339}
2340
2341int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2342{
2343 BlockDriver *drv = bs->drv;
2344 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002345 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002346 if (drv->bdrv_snapshot_delete)
2347 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2348 if (bs->file)
2349 return bdrv_snapshot_delete(bs->file, snapshot_id);
2350 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002351}
2352
ths5fafdf22007-09-16 21:08:06 +00002353int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002354 QEMUSnapshotInfo **psn_info)
2355{
2356 BlockDriver *drv = bs->drv;
2357 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002358 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002359 if (drv->bdrv_snapshot_list)
2360 return drv->bdrv_snapshot_list(bs, psn_info);
2361 if (bs->file)
2362 return bdrv_snapshot_list(bs->file, psn_info);
2363 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002364}
2365
edison51ef6722010-09-21 19:58:41 -07002366int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2367 const char *snapshot_name)
2368{
2369 BlockDriver *drv = bs->drv;
2370 if (!drv) {
2371 return -ENOMEDIUM;
2372 }
2373 if (!bs->read_only) {
2374 return -EINVAL;
2375 }
2376 if (drv->bdrv_snapshot_load_tmp) {
2377 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2378 }
2379 return -ENOTSUP;
2380}
2381
bellardfaea38e2006-08-05 21:31:00 +00002382#define NB_SUFFIXES 4
2383
2384char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2385{
2386 static const char suffixes[NB_SUFFIXES] = "KMGT";
2387 int64_t base;
2388 int i;
2389
2390 if (size <= 999) {
2391 snprintf(buf, buf_size, "%" PRId64, size);
2392 } else {
2393 base = 1024;
2394 for(i = 0; i < NB_SUFFIXES; i++) {
2395 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002396 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002397 (double)size / base,
2398 suffixes[i]);
2399 break;
2400 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002401 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002402 ((size + (base >> 1)) / base),
2403 suffixes[i]);
2404 break;
2405 }
2406 base = base * 1024;
2407 }
2408 }
2409 return buf;
2410}
2411
2412char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2413{
2414 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002415#ifdef _WIN32
2416 struct tm *ptm;
2417#else
bellardfaea38e2006-08-05 21:31:00 +00002418 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002419#endif
bellardfaea38e2006-08-05 21:31:00 +00002420 time_t ti;
2421 int64_t secs;
2422
2423 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002424 snprintf(buf, buf_size,
2425 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002426 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2427 } else {
2428 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002429#ifdef _WIN32
2430 ptm = localtime(&ti);
2431 strftime(date_buf, sizeof(date_buf),
2432 "%Y-%m-%d %H:%M:%S", ptm);
2433#else
bellardfaea38e2006-08-05 21:31:00 +00002434 localtime_r(&ti, &tm);
2435 strftime(date_buf, sizeof(date_buf),
2436 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002437#endif
bellardfaea38e2006-08-05 21:31:00 +00002438 secs = sn->vm_clock_nsec / 1000000000;
2439 snprintf(clock_buf, sizeof(clock_buf),
2440 "%02d:%02d:%02d.%03d",
2441 (int)(secs / 3600),
2442 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002443 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002444 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2445 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002446 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002447 sn->id_str, sn->name,
2448 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2449 date_buf,
2450 clock_buf);
2451 }
2452 return buf;
2453}
2454
bellard83f64092006-08-01 16:21:11 +00002455/**************************************************************/
2456/* async I/Os */
2457
aliguori3b69e4b2009-01-22 16:59:24 +00002458BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002459 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002460 BlockDriverCompletionFunc *cb, void *opaque)
2461{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002462 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2463
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002464 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002465 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002466}
2467
aliguorif141eaf2009-04-07 18:43:24 +00002468BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2469 QEMUIOVector *qiov, int nb_sectors,
2470 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002471{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002472 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2473
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002474 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002475 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002476}
2477
Kevin Wolf40b4f532009-09-09 17:53:37 +02002478
2479typedef struct MultiwriteCB {
2480 int error;
2481 int num_requests;
2482 int num_callbacks;
2483 struct {
2484 BlockDriverCompletionFunc *cb;
2485 void *opaque;
2486 QEMUIOVector *free_qiov;
2487 void *free_buf;
2488 } callbacks[];
2489} MultiwriteCB;
2490
2491static void multiwrite_user_cb(MultiwriteCB *mcb)
2492{
2493 int i;
2494
2495 for (i = 0; i < mcb->num_callbacks; i++) {
2496 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01002497 if (mcb->callbacks[i].free_qiov) {
2498 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2499 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002500 g_free(mcb->callbacks[i].free_qiov);
Herve Poussineauf8a83242010-01-24 21:23:56 +00002501 qemu_vfree(mcb->callbacks[i].free_buf);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002502 }
2503}
2504
2505static void multiwrite_cb(void *opaque, int ret)
2506{
2507 MultiwriteCB *mcb = opaque;
2508
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002509 trace_multiwrite_cb(mcb, ret);
2510
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02002511 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02002512 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002513 }
2514
2515 mcb->num_requests--;
2516 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02002517 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05002518 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002519 }
2520}
2521
2522static int multiwrite_req_compare(const void *a, const void *b)
2523{
Christoph Hellwig77be4362010-05-19 20:53:10 +02002524 const BlockRequest *req1 = a, *req2 = b;
2525
2526 /*
2527 * Note that we can't simply subtract req2->sector from req1->sector
2528 * here as that could overflow the return value.
2529 */
2530 if (req1->sector > req2->sector) {
2531 return 1;
2532 } else if (req1->sector < req2->sector) {
2533 return -1;
2534 } else {
2535 return 0;
2536 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002537}
2538
2539/*
2540 * Takes a bunch of requests and tries to merge them. Returns the number of
2541 * requests that remain after merging.
2542 */
2543static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2544 int num_reqs, MultiwriteCB *mcb)
2545{
2546 int i, outidx;
2547
2548 // Sort requests by start sector
2549 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2550
2551 // Check if adjacent requests touch the same clusters. If so, combine them,
2552 // filling up gaps with zero sectors.
2553 outidx = 0;
2554 for (i = 1; i < num_reqs; i++) {
2555 int merge = 0;
2556 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2557
2558 // This handles the cases that are valid for all block drivers, namely
2559 // exactly sequential writes and overlapping writes.
2560 if (reqs[i].sector <= oldreq_last) {
2561 merge = 1;
2562 }
2563
2564 // The block driver may decide that it makes sense to combine requests
2565 // even if there is a gap of some sectors between them. In this case,
2566 // the gap is filled with zeros (therefore only applicable for yet
2567 // unused space in format like qcow2).
2568 if (!merge && bs->drv->bdrv_merge_requests) {
2569 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2570 }
2571
Christoph Hellwige2a305f2010-01-26 14:49:08 +01002572 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2573 merge = 0;
2574 }
2575
Kevin Wolf40b4f532009-09-09 17:53:37 +02002576 if (merge) {
2577 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05002578 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002579 qemu_iovec_init(qiov,
2580 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2581
2582 // Add the first request to the merged one. If the requests are
2583 // overlapping, drop the last sectors of the first request.
2584 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2585 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2586
2587 // We might need to add some zeros between the two requests
2588 if (reqs[i].sector > oldreq_last) {
2589 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2590 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2591 memset(buf, 0, zero_bytes);
2592 qemu_iovec_add(qiov, buf, zero_bytes);
2593 mcb->callbacks[i].free_buf = buf;
2594 }
2595
2596 // Add the second request
2597 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2598
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02002599 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002600 reqs[outidx].qiov = qiov;
2601
2602 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2603 } else {
2604 outidx++;
2605 reqs[outidx].sector = reqs[i].sector;
2606 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2607 reqs[outidx].qiov = reqs[i].qiov;
2608 }
2609 }
2610
2611 return outidx + 1;
2612}
2613
2614/*
2615 * Submit multiple AIO write requests at once.
2616 *
2617 * On success, the function returns 0 and all requests in the reqs array have
2618 * been submitted. In error case this function returns -1, and any of the
2619 * requests may or may not be submitted yet. In particular, this means that the
2620 * callback will be called for some of the requests, for others it won't. The
2621 * caller must check the error field of the BlockRequest to wait for the right
2622 * callbacks (if error != 0, no callback will be called).
2623 *
2624 * The implementation may modify the contents of the reqs array, e.g. to merge
2625 * requests. However, the fields opaque and error are left unmodified as they
2626 * are used to signal failure for a single request to the caller.
2627 */
2628int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2629{
2630 BlockDriverAIOCB *acb;
2631 MultiwriteCB *mcb;
2632 int i;
2633
Ryan Harper301db7c2011-03-07 10:01:04 -06002634 /* don't submit writes if we don't have a medium */
2635 if (bs->drv == NULL) {
2636 for (i = 0; i < num_reqs; i++) {
2637 reqs[i].error = -ENOMEDIUM;
2638 }
2639 return -1;
2640 }
2641
Kevin Wolf40b4f532009-09-09 17:53:37 +02002642 if (num_reqs == 0) {
2643 return 0;
2644 }
2645
2646 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05002647 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002648 mcb->num_requests = 0;
2649 mcb->num_callbacks = num_reqs;
2650
2651 for (i = 0; i < num_reqs; i++) {
2652 mcb->callbacks[i].cb = reqs[i].cb;
2653 mcb->callbacks[i].opaque = reqs[i].opaque;
2654 }
2655
2656 // Check for mergable requests
2657 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2658
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002659 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2660
Kevin Wolf453f9a12010-07-02 14:01:21 +02002661 /*
2662 * Run the aio requests. As soon as one request can't be submitted
2663 * successfully, fail all requests that are not yet submitted (we must
2664 * return failure for all requests anyway)
2665 *
2666 * num_requests cannot be set to the right value immediately: If
2667 * bdrv_aio_writev fails for some request, num_requests would be too high
2668 * and therefore multiwrite_cb() would never recognize the multiwrite
2669 * request as completed. We also cannot use the loop variable i to set it
2670 * when the first request fails because the callback may already have been
2671 * called for previously submitted requests. Thus, num_requests must be
2672 * incremented for each request that is submitted.
2673 *
2674 * The problem that callbacks may be called early also means that we need
2675 * to take care that num_requests doesn't become 0 before all requests are
2676 * submitted - multiwrite_cb() would consider the multiwrite request
2677 * completed. A dummy request that is "completed" by a manual call to
2678 * multiwrite_cb() takes care of this.
2679 */
2680 mcb->num_requests = 1;
2681
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002682 // Run the aio requests
Kevin Wolf40b4f532009-09-09 17:53:37 +02002683 for (i = 0; i < num_reqs; i++) {
Kevin Wolf453f9a12010-07-02 14:01:21 +02002684 mcb->num_requests++;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002685 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2686 reqs[i].nb_sectors, multiwrite_cb, mcb);
2687
2688 if (acb == NULL) {
2689 // We can only fail the whole thing if no request has been
2690 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2691 // complete and report the error in the callback.
Kevin Wolf453f9a12010-07-02 14:01:21 +02002692 if (i == 0) {
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002693 trace_bdrv_aio_multiwrite_earlyfail(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002694 goto fail;
2695 } else {
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002696 trace_bdrv_aio_multiwrite_latefail(mcb, i);
Kevin Wolf7eb58a62010-04-06 18:24:07 +02002697 multiwrite_cb(mcb, -EIO);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002698 break;
2699 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002700 }
2701 }
2702
Kevin Wolf453f9a12010-07-02 14:01:21 +02002703 /* Complete the dummy request */
2704 multiwrite_cb(mcb, 0);
2705
Kevin Wolf40b4f532009-09-09 17:53:37 +02002706 return 0;
2707
2708fail:
Kevin Wolf453f9a12010-07-02 14:01:21 +02002709 for (i = 0; i < mcb->num_callbacks; i++) {
2710 reqs[i].error = -EIO;
2711 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002712 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002713 return -1;
2714}
2715
bellard83f64092006-08-01 16:21:11 +00002716void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00002717{
aliguori6bbff9a2009-03-20 18:25:59 +00002718 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00002719}
2720
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08002721/* block I/O throttling */
2722static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2723 bool is_write, double elapsed_time, uint64_t *wait)
2724{
2725 uint64_t bps_limit = 0;
2726 double bytes_limit, bytes_base, bytes_res;
2727 double slice_time, wait_time;
2728
2729 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2730 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2731 } else if (bs->io_limits.bps[is_write]) {
2732 bps_limit = bs->io_limits.bps[is_write];
2733 } else {
2734 if (wait) {
2735 *wait = 0;
2736 }
2737
2738 return false;
2739 }
2740
2741 slice_time = bs->slice_end - bs->slice_start;
2742 slice_time /= (NANOSECONDS_PER_SECOND);
2743 bytes_limit = bps_limit * slice_time;
2744 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2745 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2746 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2747 }
2748
2749 /* bytes_base: the bytes of data which have been read/written; and
2750 * it is obtained from the history statistic info.
2751 * bytes_res: the remaining bytes of data which need to be read/written.
2752 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2753 * the total time for completing reading/writting all data.
2754 */
2755 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2756
2757 if (bytes_base + bytes_res <= bytes_limit) {
2758 if (wait) {
2759 *wait = 0;
2760 }
2761
2762 return false;
2763 }
2764
2765 /* Calc approx time to dispatch */
2766 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2767
2768 /* When the I/O rate at runtime exceeds the limits,
2769 * bs->slice_end need to be extended in order that the current statistic
2770 * info can be kept until the timer fire, so it is increased and tuned
2771 * based on the result of experiment.
2772 */
2773 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2774 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2775 if (wait) {
2776 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2777 }
2778
2779 return true;
2780}
2781
2782static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2783 double elapsed_time, uint64_t *wait)
2784{
2785 uint64_t iops_limit = 0;
2786 double ios_limit, ios_base;
2787 double slice_time, wait_time;
2788
2789 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2790 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2791 } else if (bs->io_limits.iops[is_write]) {
2792 iops_limit = bs->io_limits.iops[is_write];
2793 } else {
2794 if (wait) {
2795 *wait = 0;
2796 }
2797
2798 return false;
2799 }
2800
2801 slice_time = bs->slice_end - bs->slice_start;
2802 slice_time /= (NANOSECONDS_PER_SECOND);
2803 ios_limit = iops_limit * slice_time;
2804 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2805 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2806 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2807 }
2808
2809 if (ios_base + 1 <= ios_limit) {
2810 if (wait) {
2811 *wait = 0;
2812 }
2813
2814 return false;
2815 }
2816
2817 /* Calc approx time to dispatch */
2818 wait_time = (ios_base + 1) / iops_limit;
2819 if (wait_time > elapsed_time) {
2820 wait_time = wait_time - elapsed_time;
2821 } else {
2822 wait_time = 0;
2823 }
2824
2825 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2826 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2827 if (wait) {
2828 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2829 }
2830
2831 return true;
2832}
2833
2834static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
2835 bool is_write, int64_t *wait)
2836{
2837 int64_t now, max_wait;
2838 uint64_t bps_wait = 0, iops_wait = 0;
2839 double elapsed_time;
2840 int bps_ret, iops_ret;
2841
2842 now = qemu_get_clock_ns(vm_clock);
2843 if ((bs->slice_start < now)
2844 && (bs->slice_end > now)) {
2845 bs->slice_end = now + bs->slice_time;
2846 } else {
2847 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
2848 bs->slice_start = now;
2849 bs->slice_end = now + bs->slice_time;
2850
2851 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
2852 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
2853
2854 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
2855 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
2856 }
2857
2858 elapsed_time = now - bs->slice_start;
2859 elapsed_time /= (NANOSECONDS_PER_SECOND);
2860
2861 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
2862 is_write, elapsed_time, &bps_wait);
2863 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
2864 elapsed_time, &iops_wait);
2865 if (bps_ret || iops_ret) {
2866 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
2867 if (wait) {
2868 *wait = max_wait;
2869 }
2870
2871 now = qemu_get_clock_ns(vm_clock);
2872 if (bs->slice_end < now + max_wait) {
2873 bs->slice_end = now + max_wait;
2874 }
2875
2876 return true;
2877 }
2878
2879 if (wait) {
2880 *wait = 0;
2881 }
2882
2883 return false;
2884}
pbrookce1a14d2006-08-07 02:38:06 +00002885
bellard83f64092006-08-01 16:21:11 +00002886/**************************************************************/
2887/* async block device emulation */
2888
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02002889typedef struct BlockDriverAIOCBSync {
2890 BlockDriverAIOCB common;
2891 QEMUBH *bh;
2892 int ret;
2893 /* vector translation state */
2894 QEMUIOVector *qiov;
2895 uint8_t *bounce;
2896 int is_write;
2897} BlockDriverAIOCBSync;
2898
2899static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2900{
Kevin Wolfb666d232010-05-05 11:44:39 +02002901 BlockDriverAIOCBSync *acb =
2902 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03002903 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03002904 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02002905 qemu_aio_release(acb);
2906}
2907
2908static AIOPool bdrv_em_aio_pool = {
2909 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2910 .cancel = bdrv_aio_cancel_em,
2911};
2912
bellard83f64092006-08-01 16:21:11 +00002913static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00002914{
pbrookce1a14d2006-08-07 02:38:06 +00002915 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00002916
aliguorif141eaf2009-04-07 18:43:24 +00002917 if (!acb->is_write)
2918 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00002919 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00002920 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03002921 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03002922 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00002923 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00002924}
bellardbeac80c2006-06-26 20:08:57 +00002925
aliguorif141eaf2009-04-07 18:43:24 +00002926static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2927 int64_t sector_num,
2928 QEMUIOVector *qiov,
2929 int nb_sectors,
2930 BlockDriverCompletionFunc *cb,
2931 void *opaque,
2932 int is_write)
2933
bellardea2384d2004-08-01 21:59:26 +00002934{
pbrookce1a14d2006-08-07 02:38:06 +00002935 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00002936
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02002937 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00002938 acb->is_write = is_write;
2939 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00002940 acb->bounce = qemu_blockalign(bs, qiov->size);
aliguorif141eaf2009-04-07 18:43:24 +00002941
pbrookce1a14d2006-08-07 02:38:06 +00002942 if (!acb->bh)
2943 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00002944
2945 if (is_write) {
2946 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01002947 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00002948 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01002949 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00002950 }
2951
pbrookce1a14d2006-08-07 02:38:06 +00002952 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00002953
pbrookce1a14d2006-08-07 02:38:06 +00002954 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00002955}
2956
aliguorif141eaf2009-04-07 18:43:24 +00002957static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2958 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00002959 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002960{
aliguorif141eaf2009-04-07 18:43:24 +00002961 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00002962}
2963
aliguorif141eaf2009-04-07 18:43:24 +00002964static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2965 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2966 BlockDriverCompletionFunc *cb, void *opaque)
2967{
2968 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2969}
2970
Kevin Wolf68485422011-06-30 10:05:46 +02002971
2972typedef struct BlockDriverAIOCBCoroutine {
2973 BlockDriverAIOCB common;
2974 BlockRequest req;
2975 bool is_write;
2976 QEMUBH* bh;
2977} BlockDriverAIOCBCoroutine;
2978
2979static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2980{
2981 qemu_aio_flush();
2982}
2983
2984static AIOPool bdrv_em_co_aio_pool = {
2985 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2986 .cancel = bdrv_aio_co_cancel_em,
2987};
2988
Paolo Bonzini35246a62011-10-14 10:41:29 +02002989static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02002990{
2991 BlockDriverAIOCBCoroutine *acb = opaque;
2992
2993 acb->common.cb(acb->common.opaque, acb->req.error);
2994 qemu_bh_delete(acb->bh);
2995 qemu_aio_release(acb);
2996}
2997
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002998/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2999static void coroutine_fn bdrv_co_do_rw(void *opaque)
3000{
3001 BlockDriverAIOCBCoroutine *acb = opaque;
3002 BlockDriverState *bs = acb->common.bs;
3003
3004 if (!acb->is_write) {
3005 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3006 acb->req.nb_sectors, acb->req.qiov);
3007 } else {
3008 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3009 acb->req.nb_sectors, acb->req.qiov);
3010 }
3011
Paolo Bonzini35246a62011-10-14 10:41:29 +02003012 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003013 qemu_bh_schedule(acb->bh);
3014}
3015
Kevin Wolf68485422011-06-30 10:05:46 +02003016static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3017 int64_t sector_num,
3018 QEMUIOVector *qiov,
3019 int nb_sectors,
3020 BlockDriverCompletionFunc *cb,
3021 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003022 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003023{
3024 Coroutine *co;
3025 BlockDriverAIOCBCoroutine *acb;
3026
3027 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3028 acb->req.sector = sector_num;
3029 acb->req.nb_sectors = nb_sectors;
3030 acb->req.qiov = qiov;
3031 acb->is_write = is_write;
3032
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003033 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003034 qemu_coroutine_enter(co, acb);
3035
3036 return &acb->common;
3037}
3038
Paolo Bonzini07f07612011-10-17 12:32:12 +02003039static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003040{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003041 BlockDriverAIOCBCoroutine *acb = opaque;
3042 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003043
Paolo Bonzini07f07612011-10-17 12:32:12 +02003044 acb->req.error = bdrv_co_flush(bs);
3045 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003046 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003047}
3048
Paolo Bonzini07f07612011-10-17 12:32:12 +02003049BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003050 BlockDriverCompletionFunc *cb, void *opaque)
3051{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003052 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003053
Paolo Bonzini07f07612011-10-17 12:32:12 +02003054 Coroutine *co;
3055 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003056
Paolo Bonzini07f07612011-10-17 12:32:12 +02003057 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3058 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3059 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003060
Alexander Graf016f5cf2010-05-26 17:51:49 +02003061 return &acb->common;
3062}
3063
Paolo Bonzini4265d622011-10-17 12:32:14 +02003064static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3065{
3066 BlockDriverAIOCBCoroutine *acb = opaque;
3067 BlockDriverState *bs = acb->common.bs;
3068
3069 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3070 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3071 qemu_bh_schedule(acb->bh);
3072}
3073
3074BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3075 int64_t sector_num, int nb_sectors,
3076 BlockDriverCompletionFunc *cb, void *opaque)
3077{
3078 Coroutine *co;
3079 BlockDriverAIOCBCoroutine *acb;
3080
3081 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3082
3083 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3084 acb->req.sector = sector_num;
3085 acb->req.nb_sectors = nb_sectors;
3086 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3087 qemu_coroutine_enter(co, acb);
3088
3089 return &acb->common;
3090}
3091
bellardea2384d2004-08-01 21:59:26 +00003092void bdrv_init(void)
3093{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003094 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003095}
pbrookce1a14d2006-08-07 02:38:06 +00003096
Markus Armbrustereb852012009-10-27 18:41:44 +01003097void bdrv_init_with_whitelist(void)
3098{
3099 use_bdrv_whitelist = 1;
3100 bdrv_init();
3101}
3102
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003103void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3104 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003105{
pbrookce1a14d2006-08-07 02:38:06 +00003106 BlockDriverAIOCB *acb;
3107
aliguori6bbff9a2009-03-20 18:25:59 +00003108 if (pool->free_aiocb) {
3109 acb = pool->free_aiocb;
3110 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003111 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003112 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003113 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003114 }
3115 acb->bs = bs;
3116 acb->cb = cb;
3117 acb->opaque = opaque;
3118 return acb;
3119}
3120
3121void qemu_aio_release(void *p)
3122{
aliguori6bbff9a2009-03-20 18:25:59 +00003123 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3124 AIOPool *pool = acb->pool;
3125 acb->next = pool->free_aiocb;
3126 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003127}
bellard19cb3732006-08-19 11:45:59 +00003128
3129/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003130/* Coroutine block device emulation */
3131
3132typedef struct CoroutineIOCompletion {
3133 Coroutine *coroutine;
3134 int ret;
3135} CoroutineIOCompletion;
3136
3137static void bdrv_co_io_em_complete(void *opaque, int ret)
3138{
3139 CoroutineIOCompletion *co = opaque;
3140
3141 co->ret = ret;
3142 qemu_coroutine_enter(co->coroutine, NULL);
3143}
3144
3145static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3146 int nb_sectors, QEMUIOVector *iov,
3147 bool is_write)
3148{
3149 CoroutineIOCompletion co = {
3150 .coroutine = qemu_coroutine_self(),
3151 };
3152 BlockDriverAIOCB *acb;
3153
3154 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003155 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3156 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003157 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003158 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3159 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003160 }
3161
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003162 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003163 if (!acb) {
3164 return -EIO;
3165 }
3166 qemu_coroutine_yield();
3167
3168 return co.ret;
3169}
3170
3171static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3172 int64_t sector_num, int nb_sectors,
3173 QEMUIOVector *iov)
3174{
3175 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3176}
3177
3178static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3179 int64_t sector_num, int nb_sectors,
3180 QEMUIOVector *iov)
3181{
3182 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3183}
3184
Paolo Bonzini07f07612011-10-17 12:32:12 +02003185static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003186{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003187 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003188
Paolo Bonzini07f07612011-10-17 12:32:12 +02003189 rwco->ret = bdrv_co_flush(rwco->bs);
3190}
3191
3192int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3193{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003194 int ret;
3195
Kevin Wolfca716362011-11-10 18:13:59 +01003196 if (!bs->drv) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003197 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003198 }
3199
Kevin Wolfca716362011-11-10 18:13:59 +01003200 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003201 if (bs->drv->bdrv_co_flush_to_os) {
3202 ret = bs->drv->bdrv_co_flush_to_os(bs);
3203 if (ret < 0) {
3204 return ret;
3205 }
3206 }
3207
Kevin Wolfca716362011-11-10 18:13:59 +01003208 /* But don't actually force it to the disk with cache=unsafe */
3209 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3210 return 0;
3211 }
3212
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003213 if (bs->drv->bdrv_co_flush_to_disk) {
Kevin Wolfc68b89a2011-11-10 17:25:44 +01003214 return bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003215 } else if (bs->drv->bdrv_aio_flush) {
3216 BlockDriverAIOCB *acb;
3217 CoroutineIOCompletion co = {
3218 .coroutine = qemu_coroutine_self(),
3219 };
3220
3221 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3222 if (acb == NULL) {
3223 return -EIO;
3224 } else {
3225 qemu_coroutine_yield();
3226 return co.ret;
3227 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003228 } else {
3229 /*
3230 * Some block drivers always operate in either writethrough or unsafe
3231 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3232 * know how the server works (because the behaviour is hardcoded or
3233 * depends on server-side configuration), so we can't ensure that
3234 * everything is safe on disk. Returning an error doesn't work because
3235 * that would break guests even if the server operates in writethrough
3236 * mode.
3237 *
3238 * Let's hope the user knows what he's doing.
3239 */
3240 return 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003241 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003242}
3243
Anthony Liguori0f154232011-11-14 15:09:45 -06003244void bdrv_invalidate_cache(BlockDriverState *bs)
3245{
3246 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3247 bs->drv->bdrv_invalidate_cache(bs);
3248 }
3249}
3250
3251void bdrv_invalidate_cache_all(void)
3252{
3253 BlockDriverState *bs;
3254
3255 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3256 bdrv_invalidate_cache(bs);
3257 }
3258}
3259
Paolo Bonzini07f07612011-10-17 12:32:12 +02003260int bdrv_flush(BlockDriverState *bs)
3261{
3262 Coroutine *co;
3263 RwCo rwco = {
3264 .bs = bs,
3265 .ret = NOT_DONE,
3266 };
3267
3268 if (qemu_in_coroutine()) {
3269 /* Fast-path if already in coroutine context */
3270 bdrv_flush_co_entry(&rwco);
3271 } else {
3272 co = qemu_coroutine_create(bdrv_flush_co_entry);
3273 qemu_coroutine_enter(co, &rwco);
3274 while (rwco.ret == NOT_DONE) {
3275 qemu_aio_wait();
3276 }
3277 }
3278
3279 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003280}
3281
Paolo Bonzini4265d622011-10-17 12:32:14 +02003282static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3283{
3284 RwCo *rwco = opaque;
3285
3286 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3287}
3288
3289int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3290 int nb_sectors)
3291{
3292 if (!bs->drv) {
3293 return -ENOMEDIUM;
3294 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3295 return -EIO;
3296 } else if (bs->read_only) {
3297 return -EROFS;
3298 } else if (bs->drv->bdrv_co_discard) {
3299 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3300 } else if (bs->drv->bdrv_aio_discard) {
3301 BlockDriverAIOCB *acb;
3302 CoroutineIOCompletion co = {
3303 .coroutine = qemu_coroutine_self(),
3304 };
3305
3306 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3307 bdrv_co_io_em_complete, &co);
3308 if (acb == NULL) {
3309 return -EIO;
3310 } else {
3311 qemu_coroutine_yield();
3312 return co.ret;
3313 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003314 } else {
3315 return 0;
3316 }
3317}
3318
3319int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3320{
3321 Coroutine *co;
3322 RwCo rwco = {
3323 .bs = bs,
3324 .sector_num = sector_num,
3325 .nb_sectors = nb_sectors,
3326 .ret = NOT_DONE,
3327 };
3328
3329 if (qemu_in_coroutine()) {
3330 /* Fast-path if already in coroutine context */
3331 bdrv_discard_co_entry(&rwco);
3332 } else {
3333 co = qemu_coroutine_create(bdrv_discard_co_entry);
3334 qemu_coroutine_enter(co, &rwco);
3335 while (rwco.ret == NOT_DONE) {
3336 qemu_aio_wait();
3337 }
3338 }
3339
3340 return rwco.ret;
3341}
3342
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003343/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003344/* removable device support */
3345
3346/**
3347 * Return TRUE if the media is present
3348 */
3349int bdrv_is_inserted(BlockDriverState *bs)
3350{
3351 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003352
bellard19cb3732006-08-19 11:45:59 +00003353 if (!drv)
3354 return 0;
3355 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003356 return 1;
3357 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003358}
3359
3360/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003361 * Return whether the media changed since the last call to this
3362 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003363 */
3364int bdrv_media_changed(BlockDriverState *bs)
3365{
3366 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003367
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003368 if (drv && drv->bdrv_media_changed) {
3369 return drv->bdrv_media_changed(bs);
3370 }
3371 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003372}
3373
3374/**
3375 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3376 */
Markus Armbrusterfdec4402011-09-06 18:58:45 +02003377void bdrv_eject(BlockDriverState *bs, int eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003378{
3379 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003380
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003381 if (drv && drv->bdrv_eject) {
3382 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003383 }
bellard19cb3732006-08-19 11:45:59 +00003384}
3385
bellard19cb3732006-08-19 11:45:59 +00003386/**
3387 * Lock or unlock the media (if it is locked, the user won't be able
3388 * to eject it manually).
3389 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003390void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003391{
3392 BlockDriver *drv = bs->drv;
3393
Markus Armbruster025e8492011-09-06 18:58:47 +02003394 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003395
Markus Armbruster025e8492011-09-06 18:58:47 +02003396 if (drv && drv->bdrv_lock_medium) {
3397 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003398 }
3399}
ths985a03b2007-12-24 16:10:43 +00003400
3401/* needed for generic scsi interface */
3402
3403int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3404{
3405 BlockDriver *drv = bs->drv;
3406
3407 if (drv && drv->bdrv_ioctl)
3408 return drv->bdrv_ioctl(bs, req, buf);
3409 return -ENOTSUP;
3410}
aliguori7d780662009-03-12 19:57:08 +00003411
aliguori221f7152009-03-28 17:28:41 +00003412BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3413 unsigned long int req, void *buf,
3414 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003415{
aliguori221f7152009-03-28 17:28:41 +00003416 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003417
aliguori221f7152009-03-28 17:28:41 +00003418 if (drv && drv->bdrv_aio_ioctl)
3419 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3420 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003421}
aliguorie268ca52009-04-22 20:20:00 +00003422
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003423void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3424{
3425 bs->buffer_alignment = align;
3426}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003427
aliguorie268ca52009-04-22 20:20:00 +00003428void *qemu_blockalign(BlockDriverState *bs, size_t size)
3429{
3430 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3431}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003432
3433void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3434{
3435 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003436
Liran Schouraaa0eb72010-01-26 10:31:48 +02003437 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003438 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003439 if (!bs->dirty_bitmap) {
3440 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3441 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3442 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003443
Anthony Liguori7267c092011-08-20 22:09:37 -05003444 bs->dirty_bitmap = g_malloc0(bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003445 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003446 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003447 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003448 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003449 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003450 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003451 }
3452}
3453
3454int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3455{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003456 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003457
Jan Kiszkac6d22832009-11-30 18:21:20 +01003458 if (bs->dirty_bitmap &&
3459 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003460 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3461 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003462 } else {
3463 return 0;
3464 }
3465}
3466
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003467void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3468 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003469{
3470 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3471}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003472
3473int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3474{
3475 return bs->dirty_count;
3476}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003477
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003478void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3479{
3480 assert(bs->in_use != in_use);
3481 bs->in_use = in_use;
3482}
3483
3484int bdrv_in_use(BlockDriverState *bs)
3485{
3486 return bs->in_use;
3487}
3488
Luiz Capitulino28a72822011-09-26 17:43:50 -03003489void bdrv_iostatus_enable(BlockDriverState *bs)
3490{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003491 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003492 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003493}
3494
3495/* The I/O status is only enabled if the drive explicitly
3496 * enables it _and_ the VM is configured to stop on errors */
3497bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3498{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003499 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003500 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3501 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3502 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3503}
3504
3505void bdrv_iostatus_disable(BlockDriverState *bs)
3506{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003507 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003508}
3509
3510void bdrv_iostatus_reset(BlockDriverState *bs)
3511{
3512 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003513 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003514 }
3515}
3516
3517/* XXX: Today this is set by device models because it makes the implementation
3518 quite simple. However, the block layer knows about the error, so it's
3519 possible to implement this without device models being involved */
3520void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3521{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003522 if (bdrv_iostatus_is_enabled(bs) &&
3523 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003524 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003525 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3526 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003527 }
3528}
3529
Christoph Hellwiga597e792011-08-25 08:26:01 +02003530void
3531bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3532 enum BlockAcctType type)
3533{
3534 assert(type < BDRV_MAX_IOTYPE);
3535
3536 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003537 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02003538 cookie->type = type;
3539}
3540
3541void
3542bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3543{
3544 assert(cookie->type < BDRV_MAX_IOTYPE);
3545
3546 bs->nr_bytes[cookie->type] += cookie->bytes;
3547 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003548 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02003549}
3550
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003551int bdrv_img_create(const char *filename, const char *fmt,
3552 const char *base_filename, const char *base_fmt,
3553 char *options, uint64_t img_size, int flags)
3554{
3555 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02003556 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003557 BlockDriverState *bs = NULL;
3558 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003559 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003560 int ret = 0;
3561
3562 /* Find driver and parse its options */
3563 drv = bdrv_find_format(fmt);
3564 if (!drv) {
3565 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003566 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003567 goto out;
3568 }
3569
3570 proto_drv = bdrv_find_protocol(filename);
3571 if (!proto_drv) {
3572 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003573 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003574 goto out;
3575 }
3576
3577 create_options = append_option_parameters(create_options,
3578 drv->create_options);
3579 create_options = append_option_parameters(create_options,
3580 proto_drv->create_options);
3581
3582 /* Create parameter list with default values */
3583 param = parse_option_parameters("", create_options, param);
3584
3585 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3586
3587 /* Parse -o options */
3588 if (options) {
3589 param = parse_option_parameters(options, create_options, param);
3590 if (param == NULL) {
3591 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003592 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003593 goto out;
3594 }
3595 }
3596
3597 if (base_filename) {
3598 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3599 base_filename)) {
3600 error_report("Backing file not supported for file format '%s'",
3601 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003602 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003603 goto out;
3604 }
3605 }
3606
3607 if (base_fmt) {
3608 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3609 error_report("Backing file format not supported for file "
3610 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003611 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003612 goto out;
3613 }
3614 }
3615
Jes Sorensen792da932010-12-16 13:52:17 +01003616 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3617 if (backing_file && backing_file->value.s) {
3618 if (!strcmp(filename, backing_file->value.s)) {
3619 error_report("Error: Trying to create an image with the "
3620 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003621 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01003622 goto out;
3623 }
3624 }
3625
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003626 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3627 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003628 backing_drv = bdrv_find_format(backing_fmt->value.s);
3629 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003630 error_report("Unknown backing file format '%s'",
3631 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003632 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003633 goto out;
3634 }
3635 }
3636
3637 // The size for the image must always be specified, with one exception:
3638 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02003639 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3640 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003641 if (backing_file && backing_file->value.s) {
3642 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003643 char buf[32];
3644
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003645 bs = bdrv_new("");
3646
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003647 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003648 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003649 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003650 goto out;
3651 }
3652 bdrv_get_geometry(bs, &size);
3653 size *= 512;
3654
3655 snprintf(buf, sizeof(buf), "%" PRId64, size);
3656 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3657 } else {
3658 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003659 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003660 goto out;
3661 }
3662 }
3663
3664 printf("Formatting '%s', fmt=%s ", filename, fmt);
3665 print_option_parameters(param);
3666 puts("");
3667
3668 ret = bdrv_create(drv, filename, param);
3669
3670 if (ret < 0) {
3671 if (ret == -ENOTSUP) {
3672 error_report("Formatting or formatting option not supported for "
3673 "file format '%s'", fmt);
3674 } else if (ret == -EFBIG) {
3675 error_report("The image size is too large for file format '%s'",
3676 fmt);
3677 } else {
3678 error_report("%s: error while creating %s: %s", filename, fmt,
3679 strerror(-ret));
3680 }
3681 }
3682
3683out:
3684 free_option_parameters(create_options);
3685 free_option_parameters(param);
3686
3687 if (bs) {
3688 bdrv_delete(bs);
3689 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01003690
3691 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003692}