blob: 56426a936a3ae187cd4afe9f8a49196387cc7cf9 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Paolo Bonzini2f0c9fe2012-09-28 17:22:47 +020029#include "blockjob.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050030#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020031#include "qjson.h"
Paolo Bonzini3e1caa52012-09-28 17:22:57 +020032#include "sysemu.h"
Paolo Bonzinid7d512f2012-08-23 11:20:36 +020033#include "notify.h"
Kevin Wolf68485422011-06-30 10:05:46 +020034#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030035#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080036#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000037
Juan Quintela71e72a12009-07-27 16:12:56 +020038#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000039#include <sys/types.h>
40#include <sys/stat.h>
41#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000042#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000043#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000044#include <sys/disk.h>
45#endif
blueswir1c5e97232009-03-07 20:06:23 +000046#endif
bellard7674e7b2005-04-26 21:59:26 +000047
aliguori49dc7682009-03-08 16:26:59 +000048#ifdef _WIN32
49#include <windows.h>
50#endif
51
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010052#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
53
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000054typedef enum {
55 BDRV_REQ_COPY_ON_READ = 0x1,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000056 BDRV_REQ_ZERO_WRITE = 0x2,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000057} BdrvRequestFlags;
58
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020059static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000060static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000062 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000063static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
64 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000065 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020066static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
69static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
70 int64_t sector_num, int nb_sectors,
71 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010072static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000073 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010075static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000076 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
77 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010078static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
79 int64_t sector_num,
80 QEMUIOVector *qiov,
81 int nb_sectors,
82 BlockDriverCompletionFunc *cb,
83 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010084 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010085static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010086static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
87 int64_t sector_num, int nb_sectors);
bellardec530c82006-04-25 22:36:06 +000088
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080089static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
90 bool is_write, double elapsed_time, uint64_t *wait);
91static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
92 double elapsed_time, uint64_t *wait);
93static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
94 bool is_write, int64_t *wait);
95
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010096static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
97 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000098
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010099static QLIST_HEAD(, BlockDriver) bdrv_drivers =
100 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +0000101
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200102/* The device to use for VM snapshots */
103static BlockDriverState *bs_snapshots;
104
Markus Armbrustereb852012009-10-27 18:41:44 +0100105/* If non-zero, use only whitelisted block drivers */
106static int use_bdrv_whitelist;
107
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000108#ifdef _WIN32
109static int is_windows_drive_prefix(const char *filename)
110{
111 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
112 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
113 filename[1] == ':');
114}
115
116int is_windows_drive(const char *filename)
117{
118 if (is_windows_drive_prefix(filename) &&
119 filename[2] == '\0')
120 return 1;
121 if (strstart(filename, "\\\\.\\", NULL) ||
122 strstart(filename, "//./", NULL))
123 return 1;
124 return 0;
125}
126#endif
127
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800128/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800129void bdrv_io_limits_disable(BlockDriverState *bs)
130{
131 bs->io_limits_enabled = false;
132
133 while (qemu_co_queue_next(&bs->throttled_reqs));
134
135 if (bs->block_timer) {
136 qemu_del_timer(bs->block_timer);
137 qemu_free_timer(bs->block_timer);
138 bs->block_timer = NULL;
139 }
140
141 bs->slice_start = 0;
142 bs->slice_end = 0;
143 bs->slice_time = 0;
144 memset(&bs->io_base, 0, sizeof(bs->io_base));
145}
146
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800147static void bdrv_block_timer(void *opaque)
148{
149 BlockDriverState *bs = opaque;
150
151 qemu_co_queue_next(&bs->throttled_reqs);
152}
153
154void bdrv_io_limits_enable(BlockDriverState *bs)
155{
156 qemu_co_queue_init(&bs->throttled_reqs);
157 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
158 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
159 bs->slice_start = qemu_get_clock_ns(vm_clock);
160 bs->slice_end = bs->slice_start + bs->slice_time;
161 memset(&bs->io_base, 0, sizeof(bs->io_base));
162 bs->io_limits_enabled = true;
163}
164
165bool bdrv_io_limits_enabled(BlockDriverState *bs)
166{
167 BlockIOLimit *io_limits = &bs->io_limits;
168 return io_limits->bps[BLOCK_IO_LIMIT_READ]
169 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
170 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
171 || io_limits->iops[BLOCK_IO_LIMIT_READ]
172 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
173 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
174}
175
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800176static void bdrv_io_limits_intercept(BlockDriverState *bs,
177 bool is_write, int nb_sectors)
178{
179 int64_t wait_time = -1;
180
181 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
182 qemu_co_queue_wait(&bs->throttled_reqs);
183 }
184
185 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
186 * throttled requests will not be dequeued until the current request is
187 * allowed to be serviced. So if the current request still exceeds the
188 * limits, it will be inserted to the head. All requests followed it will
189 * be still in throttled_reqs queue.
190 */
191
192 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
193 qemu_mod_timer(bs->block_timer,
194 wait_time + qemu_get_clock_ns(vm_clock));
195 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
196 }
197
198 qemu_co_queue_next(&bs->throttled_reqs);
199}
200
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000201/* check if the path starts with "<protocol>:" */
202static int path_has_protocol(const char *path)
203{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200204 const char *p;
205
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000206#ifdef _WIN32
207 if (is_windows_drive(path) ||
208 is_windows_drive_prefix(path)) {
209 return 0;
210 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200211 p = path + strcspn(path, ":/\\");
212#else
213 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000214#endif
215
Paolo Bonzini947995c2012-05-08 16:51:48 +0200216 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000217}
218
bellard83f64092006-08-01 16:21:11 +0000219int path_is_absolute(const char *path)
220{
bellard21664422007-01-07 18:22:37 +0000221#ifdef _WIN32
222 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200223 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000224 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200225 }
226 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000227#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200228 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000229#endif
bellard83f64092006-08-01 16:21:11 +0000230}
231
232/* if filename is absolute, just copy it to dest. Otherwise, build a
233 path to it by considering it is relative to base_path. URL are
234 supported. */
235void path_combine(char *dest, int dest_size,
236 const char *base_path,
237 const char *filename)
238{
239 const char *p, *p1;
240 int len;
241
242 if (dest_size <= 0)
243 return;
244 if (path_is_absolute(filename)) {
245 pstrcpy(dest, dest_size, filename);
246 } else {
247 p = strchr(base_path, ':');
248 if (p)
249 p++;
250 else
251 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000252 p1 = strrchr(base_path, '/');
253#ifdef _WIN32
254 {
255 const char *p2;
256 p2 = strrchr(base_path, '\\');
257 if (!p1 || p2 > p1)
258 p1 = p2;
259 }
260#endif
bellard83f64092006-08-01 16:21:11 +0000261 if (p1)
262 p1++;
263 else
264 p1 = base_path;
265 if (p1 > p)
266 p = p1;
267 len = p - base_path;
268 if (len > dest_size - 1)
269 len = dest_size - 1;
270 memcpy(dest, base_path, len);
271 dest[len] = '\0';
272 pstrcat(dest, dest_size, filename);
273 }
274}
275
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200276void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
277{
278 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
279 pstrcpy(dest, sz, bs->backing_file);
280 } else {
281 path_combine(dest, sz, bs->filename, bs->backing_file);
282 }
283}
284
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500285void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000286{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100287 /* Block drivers without coroutine functions need emulation */
288 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200289 bdrv->bdrv_co_readv = bdrv_co_readv_em;
290 bdrv->bdrv_co_writev = bdrv_co_writev_em;
291
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100292 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
293 * the block driver lacks aio we need to emulate that too.
294 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200295 if (!bdrv->bdrv_aio_readv) {
296 /* add AIO emulation layer */
297 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
298 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200299 }
bellard83f64092006-08-01 16:21:11 +0000300 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200301
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100302 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000303}
bellardb3380822004-03-14 21:38:54 +0000304
305/* create a new block device (by default it is empty) */
306BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000307{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100308 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000309
Anthony Liguori7267c092011-08-20 22:09:37 -0500310 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000311 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000312 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100313 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000314 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300315 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200316 notifier_list_init(&bs->close_notifiers);
317
bellardb3380822004-03-14 21:38:54 +0000318 return bs;
319}
320
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200321void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
322{
323 notifier_list_add(&bs->close_notifiers, notify);
324}
325
bellardea2384d2004-08-01 21:59:26 +0000326BlockDriver *bdrv_find_format(const char *format_name)
327{
328 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100329 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
330 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000331 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100332 }
bellardea2384d2004-08-01 21:59:26 +0000333 }
334 return NULL;
335}
336
Markus Armbrustereb852012009-10-27 18:41:44 +0100337static int bdrv_is_whitelisted(BlockDriver *drv)
338{
339 static const char *whitelist[] = {
340 CONFIG_BDRV_WHITELIST
341 };
342 const char **p;
343
344 if (!whitelist[0])
345 return 1; /* no whitelist, anything goes */
346
347 for (p = whitelist; *p; p++) {
348 if (!strcmp(drv->format_name, *p)) {
349 return 1;
350 }
351 }
352 return 0;
353}
354
355BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
356{
357 BlockDriver *drv = bdrv_find_format(format_name);
358 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
359}
360
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800361typedef struct CreateCo {
362 BlockDriver *drv;
363 char *filename;
364 QEMUOptionParameter *options;
365 int ret;
366} CreateCo;
367
368static void coroutine_fn bdrv_create_co_entry(void *opaque)
369{
370 CreateCo *cco = opaque;
371 assert(cco->drv);
372
373 cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
374}
375
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200376int bdrv_create(BlockDriver *drv, const char* filename,
377 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000378{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800379 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200380
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800381 Coroutine *co;
382 CreateCo cco = {
383 .drv = drv,
384 .filename = g_strdup(filename),
385 .options = options,
386 .ret = NOT_DONE,
387 };
388
389 if (!drv->bdrv_create) {
390 return -ENOTSUP;
391 }
392
393 if (qemu_in_coroutine()) {
394 /* Fast-path if already in coroutine context */
395 bdrv_create_co_entry(&cco);
396 } else {
397 co = qemu_coroutine_create(bdrv_create_co_entry);
398 qemu_coroutine_enter(co, &cco);
399 while (cco.ret == NOT_DONE) {
400 qemu_aio_wait();
401 }
402 }
403
404 ret = cco.ret;
405 g_free(cco.filename);
406
407 return ret;
bellardea2384d2004-08-01 21:59:26 +0000408}
409
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200410int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
411{
412 BlockDriver *drv;
413
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900414 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200415 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000416 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200417 }
418
419 return bdrv_create(drv, filename, options);
420}
421
Jim Meyeringeba25052012-05-28 09:27:54 +0200422/*
423 * Create a uniquely-named empty temporary file.
424 * Return 0 upon success, otherwise a negative errno value.
425 */
426int get_tmp_filename(char *filename, int size)
427{
bellardd5249392004-08-03 21:14:23 +0000428#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000429 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200430 /* GetTempFileName requires that its output buffer (4th param)
431 have length MAX_PATH or greater. */
432 assert(size >= MAX_PATH);
433 return (GetTempPath(MAX_PATH, temp_dir)
434 && GetTempFileName(temp_dir, "qem", 0, filename)
435 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000436#else
bellardea2384d2004-08-01 21:59:26 +0000437 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000438 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000439 tmpdir = getenv("TMPDIR");
440 if (!tmpdir)
441 tmpdir = "/tmp";
Jim Meyeringeba25052012-05-28 09:27:54 +0200442 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
443 return -EOVERFLOW;
444 }
bellardea2384d2004-08-01 21:59:26 +0000445 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800446 if (fd < 0) {
447 return -errno;
448 }
449 if (close(fd) != 0) {
450 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200451 return -errno;
452 }
453 return 0;
bellardd5249392004-08-03 21:14:23 +0000454#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200455}
bellardea2384d2004-08-01 21:59:26 +0000456
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200457/*
458 * Detect host devices. By convention, /dev/cdrom[N] is always
459 * recognized as a host CDROM.
460 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200461static BlockDriver *find_hdev_driver(const char *filename)
462{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200463 int score_max = 0, score;
464 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200465
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100466 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200467 if (d->bdrv_probe_device) {
468 score = d->bdrv_probe_device(filename);
469 if (score > score_max) {
470 score_max = score;
471 drv = d;
472 }
473 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200474 }
475
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200476 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200477}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200478
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900479BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200480{
481 BlockDriver *drv1;
482 char protocol[128];
483 int len;
484 const char *p;
485
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200486 /* TODO Drivers without bdrv_file_open must be specified explicitly */
487
Christoph Hellwig39508e72010-06-23 12:25:17 +0200488 /*
489 * XXX(hch): we really should not let host device detection
490 * override an explicit protocol specification, but moving this
491 * later breaks access to device names with colons in them.
492 * Thanks to the brain-dead persistent naming schemes on udev-
493 * based Linux systems those actually are quite common.
494 */
495 drv1 = find_hdev_driver(filename);
496 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200497 return drv1;
498 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200499
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000500 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200501 return bdrv_find_format("file");
502 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000503 p = strchr(filename, ':');
504 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200505 len = p - filename;
506 if (len > sizeof(protocol) - 1)
507 len = sizeof(protocol) - 1;
508 memcpy(protocol, filename, len);
509 protocol[len] = '\0';
510 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
511 if (drv1->protocol_name &&
512 !strcmp(drv1->protocol_name, protocol)) {
513 return drv1;
514 }
515 }
516 return NULL;
517}
518
Stefan Weilc98ac352010-07-21 21:51:51 +0200519static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000520{
bellard83f64092006-08-01 16:21:11 +0000521 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000522 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000523 uint8_t buf[2048];
524 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000525
Naphtali Spreif5edb012010-01-17 16:48:13 +0200526 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200527 if (ret < 0) {
528 *pdrv = NULL;
529 return ret;
530 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700531
Kevin Wolf08a00552010-06-01 18:37:31 +0200532 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
533 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700534 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200535 drv = bdrv_find_format("raw");
536 if (!drv) {
537 ret = -ENOENT;
538 }
539 *pdrv = drv;
540 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700541 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700542
bellard83f64092006-08-01 16:21:11 +0000543 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
544 bdrv_delete(bs);
545 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200546 *pdrv = NULL;
547 return ret;
bellard83f64092006-08-01 16:21:11 +0000548 }
549
bellardea2384d2004-08-01 21:59:26 +0000550 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200551 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100552 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000553 if (drv1->bdrv_probe) {
554 score = drv1->bdrv_probe(buf, ret, filename);
555 if (score > score_max) {
556 score_max = score;
557 drv = drv1;
558 }
bellardea2384d2004-08-01 21:59:26 +0000559 }
560 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200561 if (!drv) {
562 ret = -ENOENT;
563 }
564 *pdrv = drv;
565 return ret;
bellardea2384d2004-08-01 21:59:26 +0000566}
567
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100568/**
569 * Set the current 'total_sectors' value
570 */
571static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
572{
573 BlockDriver *drv = bs->drv;
574
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700575 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
576 if (bs->sg)
577 return 0;
578
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100579 /* query actual device if possible, otherwise just trust the hint */
580 if (drv->bdrv_getlength) {
581 int64_t length = drv->bdrv_getlength(bs);
582 if (length < 0) {
583 return length;
584 }
585 hint = length >> BDRV_SECTOR_BITS;
586 }
587
588 bs->total_sectors = hint;
589 return 0;
590}
591
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100592/**
593 * Set open flags for a given cache mode
594 *
595 * Return 0 on success, -1 if the cache mode was invalid.
596 */
597int bdrv_parse_cache_flags(const char *mode, int *flags)
598{
599 *flags &= ~BDRV_O_CACHE_MASK;
600
601 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
602 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100603 } else if (!strcmp(mode, "directsync")) {
604 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100605 } else if (!strcmp(mode, "writeback")) {
606 *flags |= BDRV_O_CACHE_WB;
607 } else if (!strcmp(mode, "unsafe")) {
608 *flags |= BDRV_O_CACHE_WB;
609 *flags |= BDRV_O_NO_FLUSH;
610 } else if (!strcmp(mode, "writethrough")) {
611 /* this is the default */
612 } else {
613 return -1;
614 }
615
616 return 0;
617}
618
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000619/**
620 * The copy-on-read flag is actually a reference count so multiple users may
621 * use the feature without worrying about clobbering its previous state.
622 * Copy-on-read stays enabled until all users have called to disable it.
623 */
624void bdrv_enable_copy_on_read(BlockDriverState *bs)
625{
626 bs->copy_on_read++;
627}
628
629void bdrv_disable_copy_on_read(BlockDriverState *bs)
630{
631 assert(bs->copy_on_read > 0);
632 bs->copy_on_read--;
633}
634
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200635/*
Kevin Wolf57915332010-04-14 15:24:50 +0200636 * Common part for opening disk images and files
637 */
638static int bdrv_open_common(BlockDriverState *bs, const char *filename,
639 int flags, BlockDriver *drv)
640{
641 int ret, open_flags;
642
643 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200644 assert(bs->file == NULL);
Kevin Wolf57915332010-04-14 15:24:50 +0200645
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100646 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
647
Kevin Wolf57915332010-04-14 15:24:50 +0200648 bs->open_flags = flags;
Kevin Wolf57915332010-04-14 15:24:50 +0200649 bs->buffer_alignment = 512;
650
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000651 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
652 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
653 bdrv_enable_copy_on_read(bs);
654 }
655
Kevin Wolf57915332010-04-14 15:24:50 +0200656 pstrcpy(bs->filename, sizeof(bs->filename), filename);
657
658 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
659 return -ENOTSUP;
660 }
661
662 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500663 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200664
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100665 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Paolo Bonzinie1e9b0a2012-06-06 00:04:53 +0200666 open_flags = flags | BDRV_O_CACHE_WB;
Kevin Wolf57915332010-04-14 15:24:50 +0200667
668 /*
669 * Clear flags that are internal to the block layer before opening the
670 * image.
671 */
Paolo Bonzinie1e9b0a2012-06-06 00:04:53 +0200672 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Kevin Wolf57915332010-04-14 15:24:50 +0200673
674 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200675 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200676 */
677 if (bs->is_temporary) {
678 open_flags |= BDRV_O_RDWR;
679 }
680
Jeff Codybe028ad2012-09-20 15:13:17 -0400681 bs->read_only = !(open_flags & BDRV_O_RDWR);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100682
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200683 /* Open the image, either directly or using a protocol */
684 if (drv->bdrv_file_open) {
685 ret = drv->bdrv_file_open(bs, filename, open_flags);
686 } else {
687 ret = bdrv_file_open(&bs->file, filename, open_flags);
688 if (ret >= 0) {
689 ret = drv->bdrv_open(bs, open_flags);
690 }
691 }
692
Kevin Wolf57915332010-04-14 15:24:50 +0200693 if (ret < 0) {
694 goto free_and_fail;
695 }
696
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100697 ret = refresh_total_sectors(bs, bs->total_sectors);
698 if (ret < 0) {
699 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200700 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100701
Kevin Wolf57915332010-04-14 15:24:50 +0200702#ifndef _WIN32
703 if (bs->is_temporary) {
704 unlink(filename);
705 }
706#endif
707 return 0;
708
709free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200710 if (bs->file) {
711 bdrv_delete(bs->file);
712 bs->file = NULL;
713 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500714 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200715 bs->opaque = NULL;
716 bs->drv = NULL;
717 return ret;
718}
719
720/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200721 * Opens a file using a protocol (file, host_device, nbd, ...)
722 */
bellard83f64092006-08-01 16:21:11 +0000723int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000724{
bellard83f64092006-08-01 16:21:11 +0000725 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200726 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000727 int ret;
728
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900729 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200730 if (!drv) {
731 return -ENOENT;
732 }
733
bellard83f64092006-08-01 16:21:11 +0000734 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200735 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000736 if (ret < 0) {
737 bdrv_delete(bs);
738 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000739 }
aliguori71d07702009-03-03 17:37:16 +0000740 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000741 *pbs = bs;
742 return 0;
bellardea2384d2004-08-01 21:59:26 +0000743}
bellardfc01f7e2003-06-30 10:03:06 +0000744
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200745/*
746 * Opens a disk image (raw, qcow2, vmdk, ...)
747 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200748int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
749 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000750{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200751 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200752 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000753
bellard83f64092006-08-01 16:21:11 +0000754 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000755 BlockDriverState *bs1;
756 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000757 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200758 BlockDriver *bdrv_qcow2;
759 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200760 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000761
bellardea2384d2004-08-01 21:59:26 +0000762 /* if snapshot, we create a temporary backing file and open it
763 instead of opening 'filename' directly */
764
765 /* if there is a backing file, use it */
766 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200767 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000768 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000769 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000770 return ret;
bellardea2384d2004-08-01 21:59:26 +0000771 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200772 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000773
774 if (bs1->drv && bs1->drv->protocol_name)
775 is_protocol = 1;
776
bellardea2384d2004-08-01 21:59:26 +0000777 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000778
Jim Meyeringeba25052012-05-28 09:27:54 +0200779 ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
780 if (ret < 0) {
781 return ret;
782 }
aliguori7c96d462008-09-12 17:54:13 +0000783
784 /* Real path is meaningless for protocols */
785 if (is_protocol)
786 snprintf(backing_filename, sizeof(backing_filename),
787 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000788 else if (!realpath(filename, backing_filename))
789 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000790
Kevin Wolf91a073a2009-05-27 14:48:06 +0200791 bdrv_qcow2 = bdrv_find_format("qcow2");
792 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
793
Jes Sorensen3e829902010-05-27 16:20:30 +0200794 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200795 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
796 if (drv) {
797 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
798 drv->format_name);
799 }
800
801 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200802 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000803 if (ret < 0) {
804 return ret;
bellardea2384d2004-08-01 21:59:26 +0000805 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200806
bellardea2384d2004-08-01 21:59:26 +0000807 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200808 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000809 bs->is_temporary = 1;
810 }
bellard712e7872005-04-28 21:09:32 +0000811
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200812 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200813 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200814 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000815 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100816
aliguori51d7c002009-03-05 23:00:29 +0000817 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000818 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000819 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200820
Jeff Codybe028ad2012-09-20 15:13:17 -0400821 if (flags & BDRV_O_RDWR) {
822 flags |= BDRV_O_ALLOW_RDWR;
823 }
824
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200825 /* Open the image */
826 ret = bdrv_open_common(bs, filename, flags, drv);
827 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100828 goto unlink_and_fail;
829 }
830
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200831 /* If there is a backing file, use it */
832 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
833 char backing_filename[PATH_MAX];
834 int back_flags;
835 BlockDriver *back_drv = NULL;
836
837 bs->backing_hd = bdrv_new("");
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200838 bdrv_get_full_backing_filename(bs, backing_filename,
839 sizeof(backing_filename));
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000840
841 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200842 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000843 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200844
845 /* backing files always opened read-only */
846 back_flags =
847 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
848
849 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
850 if (ret < 0) {
851 bdrv_close(bs);
852 return ret;
853 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200854 }
855
856 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200857 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200858 }
859
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800860 /* throttling disk I/O limits */
861 if (bs->io_limits_enabled) {
862 bdrv_io_limits_enable(bs);
863 }
864
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200865 return 0;
866
867unlink_and_fail:
868 if (bs->is_temporary) {
869 unlink(filename);
870 }
871 return ret;
872}
873
Jeff Codye971aa12012-09-20 15:13:19 -0400874typedef struct BlockReopenQueueEntry {
875 bool prepared;
876 BDRVReopenState state;
877 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
878} BlockReopenQueueEntry;
879
880/*
881 * Adds a BlockDriverState to a simple queue for an atomic, transactional
882 * reopen of multiple devices.
883 *
884 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
885 * already performed, or alternatively may be NULL a new BlockReopenQueue will
886 * be created and initialized. This newly created BlockReopenQueue should be
887 * passed back in for subsequent calls that are intended to be of the same
888 * atomic 'set'.
889 *
890 * bs is the BlockDriverState to add to the reopen queue.
891 *
892 * flags contains the open flags for the associated bs
893 *
894 * returns a pointer to bs_queue, which is either the newly allocated
895 * bs_queue, or the existing bs_queue being used.
896 *
897 */
898BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
899 BlockDriverState *bs, int flags)
900{
901 assert(bs != NULL);
902
903 BlockReopenQueueEntry *bs_entry;
904 if (bs_queue == NULL) {
905 bs_queue = g_new0(BlockReopenQueue, 1);
906 QSIMPLEQ_INIT(bs_queue);
907 }
908
909 if (bs->file) {
910 bdrv_reopen_queue(bs_queue, bs->file, flags);
911 }
912
913 bs_entry = g_new0(BlockReopenQueueEntry, 1);
914 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
915
916 bs_entry->state.bs = bs;
917 bs_entry->state.flags = flags;
918
919 return bs_queue;
920}
921
922/*
923 * Reopen multiple BlockDriverStates atomically & transactionally.
924 *
925 * The queue passed in (bs_queue) must have been built up previous
926 * via bdrv_reopen_queue().
927 *
928 * Reopens all BDS specified in the queue, with the appropriate
929 * flags. All devices are prepared for reopen, and failure of any
930 * device will cause all device changes to be abandonded, and intermediate
931 * data cleaned up.
932 *
933 * If all devices prepare successfully, then the changes are committed
934 * to all devices.
935 *
936 */
937int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
938{
939 int ret = -1;
940 BlockReopenQueueEntry *bs_entry, *next;
941 Error *local_err = NULL;
942
943 assert(bs_queue != NULL);
944
945 bdrv_drain_all();
946
947 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
948 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
949 error_propagate(errp, local_err);
950 goto cleanup;
951 }
952 bs_entry->prepared = true;
953 }
954
955 /* If we reach this point, we have success and just need to apply the
956 * changes
957 */
958 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
959 bdrv_reopen_commit(&bs_entry->state);
960 }
961
962 ret = 0;
963
964cleanup:
965 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
966 if (ret && bs_entry->prepared) {
967 bdrv_reopen_abort(&bs_entry->state);
968 }
969 g_free(bs_entry);
970 }
971 g_free(bs_queue);
972 return ret;
973}
974
975
976/* Reopen a single BlockDriverState with the specified flags. */
977int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
978{
979 int ret = -1;
980 Error *local_err = NULL;
981 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
982
983 ret = bdrv_reopen_multiple(queue, &local_err);
984 if (local_err != NULL) {
985 error_propagate(errp, local_err);
986 }
987 return ret;
988}
989
990
991/*
992 * Prepares a BlockDriverState for reopen. All changes are staged in the
993 * 'opaque' field of the BDRVReopenState, which is used and allocated by
994 * the block driver layer .bdrv_reopen_prepare()
995 *
996 * bs is the BlockDriverState to reopen
997 * flags are the new open flags
998 * queue is the reopen queue
999 *
1000 * Returns 0 on success, non-zero on error. On error errp will be set
1001 * as well.
1002 *
1003 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1004 * It is the responsibility of the caller to then call the abort() or
1005 * commit() for any other BDS that have been left in a prepare() state
1006 *
1007 */
1008int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1009 Error **errp)
1010{
1011 int ret = -1;
1012 Error *local_err = NULL;
1013 BlockDriver *drv;
1014
1015 assert(reopen_state != NULL);
1016 assert(reopen_state->bs->drv != NULL);
1017 drv = reopen_state->bs->drv;
1018
1019 /* if we are to stay read-only, do not allow permission change
1020 * to r/w */
1021 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1022 reopen_state->flags & BDRV_O_RDWR) {
1023 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1024 reopen_state->bs->device_name);
1025 goto error;
1026 }
1027
1028
1029 ret = bdrv_flush(reopen_state->bs);
1030 if (ret) {
1031 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1032 strerror(-ret));
1033 goto error;
1034 }
1035
1036 if (drv->bdrv_reopen_prepare) {
1037 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1038 if (ret) {
1039 if (local_err != NULL) {
1040 error_propagate(errp, local_err);
1041 } else {
1042 error_set(errp, QERR_OPEN_FILE_FAILED,
1043 reopen_state->bs->filename);
1044 }
1045 goto error;
1046 }
1047 } else {
1048 /* It is currently mandatory to have a bdrv_reopen_prepare()
1049 * handler for each supported drv. */
1050 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1051 drv->format_name, reopen_state->bs->device_name,
1052 "reopening of file");
1053 ret = -1;
1054 goto error;
1055 }
1056
1057 ret = 0;
1058
1059error:
1060 return ret;
1061}
1062
1063/*
1064 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1065 * makes them final by swapping the staging BlockDriverState contents into
1066 * the active BlockDriverState contents.
1067 */
1068void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1069{
1070 BlockDriver *drv;
1071
1072 assert(reopen_state != NULL);
1073 drv = reopen_state->bs->drv;
1074 assert(drv != NULL);
1075
1076 /* If there are any driver level actions to take */
1077 if (drv->bdrv_reopen_commit) {
1078 drv->bdrv_reopen_commit(reopen_state);
1079 }
1080
1081 /* set BDS specific flags now */
1082 reopen_state->bs->open_flags = reopen_state->flags;
1083 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1084 BDRV_O_CACHE_WB);
1085 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1086}
1087
1088/*
1089 * Abort the reopen, and delete and free the staged changes in
1090 * reopen_state
1091 */
1092void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1093{
1094 BlockDriver *drv;
1095
1096 assert(reopen_state != NULL);
1097 drv = reopen_state->bs->drv;
1098 assert(drv != NULL);
1099
1100 if (drv->bdrv_reopen_abort) {
1101 drv->bdrv_reopen_abort(reopen_state);
1102 }
1103}
1104
1105
bellardfc01f7e2003-06-30 10:03:06 +00001106void bdrv_close(BlockDriverState *bs)
1107{
Liu Yuan80ccf932012-04-20 17:10:56 +08001108 bdrv_flush(bs);
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001109 if (bs->job) {
1110 block_job_cancel_sync(bs->job);
1111 }
1112 bdrv_drain_all();
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001113 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001114
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001115 if (bs->drv) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02001116 if (bs == bs_snapshots) {
1117 bs_snapshots = NULL;
1118 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001119 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +00001120 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001121 bs->backing_hd = NULL;
1122 }
bellardea2384d2004-08-01 21:59:26 +00001123 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001124 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001125#ifdef _WIN32
1126 if (bs->is_temporary) {
1127 unlink(bs->filename);
1128 }
bellard67b915a2004-03-31 23:37:16 +00001129#endif
bellardea2384d2004-08-01 21:59:26 +00001130 bs->opaque = NULL;
1131 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001132 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001133 bs->backing_file[0] = '\0';
1134 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001135 bs->total_sectors = 0;
1136 bs->encrypted = 0;
1137 bs->valid_key = 0;
1138 bs->sg = 0;
1139 bs->growable = 0;
bellardb3380822004-03-14 21:38:54 +00001140
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001141 if (bs->file != NULL) {
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001142 bdrv_delete(bs->file);
1143 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001144 }
bellardb3380822004-03-14 21:38:54 +00001145 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001146
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001147 bdrv_dev_change_media_cb(bs, false);
1148
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001149 /*throttling disk I/O limits*/
1150 if (bs->io_limits_enabled) {
1151 bdrv_io_limits_disable(bs);
1152 }
bellardb3380822004-03-14 21:38:54 +00001153}
1154
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001155void bdrv_close_all(void)
1156{
1157 BlockDriverState *bs;
1158
1159 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1160 bdrv_close(bs);
1161 }
1162}
1163
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001164/*
1165 * Wait for pending requests to complete across all BlockDriverStates
1166 *
1167 * This function does not flush data to disk, use bdrv_flush_all() for that
1168 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001169 *
1170 * Note that completion of an asynchronous I/O operation can trigger any
1171 * number of other I/O operations on other devices---for example a coroutine
1172 * can be arbitrarily complex and a constant flow of I/O can come until the
1173 * coroutine is complete. Because of this, it is not possible to have a
1174 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001175 */
1176void bdrv_drain_all(void)
1177{
1178 BlockDriverState *bs;
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001179 bool busy;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001180
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001181 do {
1182 busy = qemu_aio_wait();
1183
1184 /* FIXME: We do not have timer support here, so this is effectively
1185 * a busy wait.
1186 */
1187 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1188 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
1189 qemu_co_queue_restart_all(&bs->throttled_reqs);
1190 busy = true;
1191 }
1192 }
1193 } while (busy);
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001194
1195 /* If requests are still pending there is a bug somewhere */
1196 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1197 assert(QLIST_EMPTY(&bs->tracked_requests));
1198 assert(qemu_co_queue_empty(&bs->throttled_reqs));
1199 }
1200}
1201
Ryan Harperd22b2f42011-03-29 20:51:47 -05001202/* make a BlockDriverState anonymous by removing from bdrv_state list.
1203 Also, NULL terminate the device_name to prevent double remove */
1204void bdrv_make_anon(BlockDriverState *bs)
1205{
1206 if (bs->device_name[0] != '\0') {
1207 QTAILQ_REMOVE(&bdrv_states, bs, list);
1208 }
1209 bs->device_name[0] = '\0';
1210}
1211
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02001212static void bdrv_rebind(BlockDriverState *bs)
1213{
1214 if (bs->drv && bs->drv->bdrv_rebind) {
1215 bs->drv->bdrv_rebind(bs);
1216 }
1217}
1218
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001219static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1220 BlockDriverState *bs_src)
1221{
1222 /* move some fields that need to stay attached to the device */
1223 bs_dest->open_flags = bs_src->open_flags;
1224
1225 /* dev info */
1226 bs_dest->dev_ops = bs_src->dev_ops;
1227 bs_dest->dev_opaque = bs_src->dev_opaque;
1228 bs_dest->dev = bs_src->dev;
1229 bs_dest->buffer_alignment = bs_src->buffer_alignment;
1230 bs_dest->copy_on_read = bs_src->copy_on_read;
1231
1232 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1233
1234 /* i/o timing parameters */
1235 bs_dest->slice_time = bs_src->slice_time;
1236 bs_dest->slice_start = bs_src->slice_start;
1237 bs_dest->slice_end = bs_src->slice_end;
1238 bs_dest->io_limits = bs_src->io_limits;
1239 bs_dest->io_base = bs_src->io_base;
1240 bs_dest->throttled_reqs = bs_src->throttled_reqs;
1241 bs_dest->block_timer = bs_src->block_timer;
1242 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1243
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001244 /* r/w error */
1245 bs_dest->on_read_error = bs_src->on_read_error;
1246 bs_dest->on_write_error = bs_src->on_write_error;
1247
1248 /* i/o status */
1249 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1250 bs_dest->iostatus = bs_src->iostatus;
1251
1252 /* dirty bitmap */
1253 bs_dest->dirty_count = bs_src->dirty_count;
1254 bs_dest->dirty_bitmap = bs_src->dirty_bitmap;
1255
1256 /* job */
1257 bs_dest->in_use = bs_src->in_use;
1258 bs_dest->job = bs_src->job;
1259
1260 /* keep the same entry in bdrv_states */
1261 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
1262 bs_src->device_name);
1263 bs_dest->list = bs_src->list;
1264}
1265
1266/*
1267 * Swap bs contents for two image chains while they are live,
1268 * while keeping required fields on the BlockDriverState that is
1269 * actually attached to a device.
1270 *
1271 * This will modify the BlockDriverState fields, and swap contents
1272 * between bs_new and bs_old. Both bs_new and bs_old are modified.
1273 *
1274 * bs_new is required to be anonymous.
1275 *
1276 * This function does not create any image files.
1277 */
1278void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1279{
1280 BlockDriverState tmp;
1281
1282 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
1283 assert(bs_new->device_name[0] == '\0');
1284 assert(bs_new->dirty_bitmap == NULL);
1285 assert(bs_new->job == NULL);
1286 assert(bs_new->dev == NULL);
1287 assert(bs_new->in_use == 0);
1288 assert(bs_new->io_limits_enabled == false);
1289 assert(bs_new->block_timer == NULL);
1290
1291 tmp = *bs_new;
1292 *bs_new = *bs_old;
1293 *bs_old = tmp;
1294
1295 /* there are some fields that should not be swapped, move them back */
1296 bdrv_move_feature_fields(&tmp, bs_old);
1297 bdrv_move_feature_fields(bs_old, bs_new);
1298 bdrv_move_feature_fields(bs_new, &tmp);
1299
1300 /* bs_new shouldn't be in bdrv_states even after the swap! */
1301 assert(bs_new->device_name[0] == '\0');
1302
1303 /* Check a few fields that should remain attached to the device */
1304 assert(bs_new->dev == NULL);
1305 assert(bs_new->job == NULL);
1306 assert(bs_new->in_use == 0);
1307 assert(bs_new->io_limits_enabled == false);
1308 assert(bs_new->block_timer == NULL);
1309
1310 bdrv_rebind(bs_new);
1311 bdrv_rebind(bs_old);
1312}
1313
Jeff Cody8802d1f2012-02-28 15:54:06 -05001314/*
1315 * Add new bs contents at the top of an image chain while the chain is
1316 * live, while keeping required fields on the top layer.
1317 *
1318 * This will modify the BlockDriverState fields, and swap contents
1319 * between bs_new and bs_top. Both bs_new and bs_top are modified.
1320 *
Jeff Codyf6801b82012-03-27 16:30:19 -04001321 * bs_new is required to be anonymous.
1322 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05001323 * This function does not create any image files.
1324 */
1325void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1326{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001327 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05001328
1329 /* The contents of 'tmp' will become bs_top, as we are
1330 * swapping bs_new and bs_top contents. */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001331 bs_top->backing_hd = bs_new;
1332 bs_top->open_flags &= ~BDRV_O_NO_BACKING;
1333 pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
1334 bs_new->filename);
1335 pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
1336 bs_new->drv ? bs_new->drv->format_name : "");
Jeff Cody8802d1f2012-02-28 15:54:06 -05001337}
1338
bellardb3380822004-03-14 21:38:54 +00001339void bdrv_delete(BlockDriverState *bs)
1340{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001341 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +02001342 assert(!bs->job);
1343 assert(!bs->in_use);
Markus Armbruster18846de2010-06-29 16:58:30 +02001344
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001345 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05001346 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00001347
bellardb3380822004-03-14 21:38:54 +00001348 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001349
Markus Armbrusterf9092b12010-06-25 10:33:39 +02001350 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -05001351 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00001352}
1353
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001354int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1355/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +02001356{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001357 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +02001358 return -EBUSY;
1359 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001360 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -03001361 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +02001362 return 0;
1363}
1364
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001365/* TODO qdevified devices don't use this, remove when devices are qdevified */
1366void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02001367{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001368 if (bdrv_attach_dev(bs, dev) < 0) {
1369 abort();
1370 }
1371}
1372
1373void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1374/* TODO change to DeviceState *dev when all users are qdevified */
1375{
1376 assert(bs->dev == dev);
1377 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02001378 bs->dev_ops = NULL;
1379 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +02001380 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +02001381}
1382
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001383/* TODO change to return DeviceState * when all users are qdevified */
1384void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02001385{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001386 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02001387}
1388
Markus Armbruster0e49de52011-08-03 15:07:41 +02001389void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1390 void *opaque)
1391{
1392 bs->dev_ops = ops;
1393 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001394 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1395 bs_snapshots = NULL;
1396 }
Markus Armbruster0e49de52011-08-03 15:07:41 +02001397}
1398
Paolo Bonzini32c81a42012-09-28 17:22:58 +02001399void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1400 enum MonitorEvent ev,
1401 BlockErrorAction action, bool is_read)
Luiz Capitulino329c0a42012-01-25 16:59:43 -02001402{
1403 QObject *data;
1404 const char *action_str;
1405
1406 switch (action) {
1407 case BDRV_ACTION_REPORT:
1408 action_str = "report";
1409 break;
1410 case BDRV_ACTION_IGNORE:
1411 action_str = "ignore";
1412 break;
1413 case BDRV_ACTION_STOP:
1414 action_str = "stop";
1415 break;
1416 default:
1417 abort();
1418 }
1419
1420 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1421 bdrv->device_name,
1422 action_str,
1423 is_read ? "read" : "write");
Paolo Bonzini32c81a42012-09-28 17:22:58 +02001424 monitor_protocol_event(ev, data);
Luiz Capitulino329c0a42012-01-25 16:59:43 -02001425
1426 qobject_decref(data);
1427}
1428
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001429static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1430{
1431 QObject *data;
1432
1433 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1434 bdrv_get_device_name(bs), ejected);
1435 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1436
1437 qobject_decref(data);
1438}
1439
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001440static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02001441{
Markus Armbruster145feb12011-08-03 15:07:42 +02001442 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001443 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001444 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001445 if (tray_was_closed) {
1446 /* tray open */
1447 bdrv_emit_qmp_eject_event(bs, true);
1448 }
1449 if (load) {
1450 /* tray close */
1451 bdrv_emit_qmp_eject_event(bs, false);
1452 }
Markus Armbruster145feb12011-08-03 15:07:42 +02001453 }
1454}
1455
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001456bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1457{
1458 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1459}
1460
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01001461void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1462{
1463 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1464 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1465 }
1466}
1467
Markus Armbrustere4def802011-09-06 18:58:53 +02001468bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1469{
1470 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1471 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1472 }
1473 return false;
1474}
1475
Markus Armbruster145feb12011-08-03 15:07:42 +02001476static void bdrv_dev_resize_cb(BlockDriverState *bs)
1477{
1478 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1479 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02001480 }
1481}
1482
Markus Armbrusterf1076392011-09-06 18:58:46 +02001483bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1484{
1485 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1486 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1487 }
1488 return false;
1489}
1490
aliguorie97fc192009-04-21 23:11:50 +00001491/*
1492 * Run consistency checks on an image
1493 *
Kevin Wolfe076f332010-06-29 11:43:13 +02001494 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02001495 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02001496 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00001497 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02001498int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00001499{
1500 if (bs->drv->bdrv_check == NULL) {
1501 return -ENOTSUP;
1502 }
1503
Kevin Wolfe076f332010-06-29 11:43:13 +02001504 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02001505 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00001506}
1507
Kevin Wolf8a426612010-07-16 17:17:01 +02001508#define COMMIT_BUF_SECTORS 2048
1509
bellard33e39632003-07-06 17:15:21 +00001510/* commit COW file into the raw image */
1511int bdrv_commit(BlockDriverState *bs)
1512{
bellard19cb3732006-08-19 11:45:59 +00001513 BlockDriver *drv = bs->drv;
Kevin Wolf8a426612010-07-16 17:17:01 +02001514 int64_t sector, total_sectors;
1515 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04001516 int ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +02001517 uint8_t *buf;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02001518 char filename[PATH_MAX];
bellard33e39632003-07-06 17:15:21 +00001519
bellard19cb3732006-08-19 11:45:59 +00001520 if (!drv)
1521 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001522
1523 if (!bs->backing_hd) {
1524 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00001525 }
1526
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00001527 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1528 return -EBUSY;
1529 }
1530
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001531 ro = bs->backing_hd->read_only;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02001532 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
1533 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001534 open_flags = bs->backing_hd->open_flags;
1535
1536 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04001537 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
1538 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001539 }
bellard33e39632003-07-06 17:15:21 +00001540 }
bellardea2384d2004-08-01 21:59:26 +00001541
Jan Kiszka6ea44302009-11-30 18:21:19 +01001542 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001543 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001544
Kevin Wolf8a426612010-07-16 17:17:01 +02001545 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001546 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001547
1548 if (bdrv_read(bs, sector, buf, n) != 0) {
1549 ret = -EIO;
1550 goto ro_cleanup;
1551 }
1552
1553 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1554 ret = -EIO;
1555 goto ro_cleanup;
1556 }
bellardea2384d2004-08-01 21:59:26 +00001557 }
1558 }
bellard95389c82005-12-18 18:28:15 +00001559
Christoph Hellwig1d449522010-01-17 12:32:30 +01001560 if (drv->bdrv_make_empty) {
1561 ret = drv->bdrv_make_empty(bs);
1562 bdrv_flush(bs);
1563 }
bellard95389c82005-12-18 18:28:15 +00001564
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001565 /*
1566 * Make sure all data we wrote to the backing device is actually
1567 * stable on disk.
1568 */
1569 if (bs->backing_hd)
1570 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001571
1572ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001573 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001574
1575 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04001576 /* ignoring error return here */
1577 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001578 }
1579
Christoph Hellwig1d449522010-01-17 12:32:30 +01001580 return ret;
bellard33e39632003-07-06 17:15:21 +00001581}
1582
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001583int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001584{
1585 BlockDriverState *bs;
1586
1587 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001588 int ret = bdrv_commit(bs);
1589 if (ret < 0) {
1590 return ret;
1591 }
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001592 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001593 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001594}
1595
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001596struct BdrvTrackedRequest {
1597 BlockDriverState *bs;
1598 int64_t sector_num;
1599 int nb_sectors;
1600 bool is_write;
1601 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001602 Coroutine *co; /* owner, used for deadlock detection */
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001603 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001604};
1605
1606/**
1607 * Remove an active request from the tracked requests list
1608 *
1609 * This function should be called when a tracked request is completing.
1610 */
1611static void tracked_request_end(BdrvTrackedRequest *req)
1612{
1613 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001614 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001615}
1616
1617/**
1618 * Add an active request to the tracked requests list
1619 */
1620static void tracked_request_begin(BdrvTrackedRequest *req,
1621 BlockDriverState *bs,
1622 int64_t sector_num,
1623 int nb_sectors, bool is_write)
1624{
1625 *req = (BdrvTrackedRequest){
1626 .bs = bs,
1627 .sector_num = sector_num,
1628 .nb_sectors = nb_sectors,
1629 .is_write = is_write,
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001630 .co = qemu_coroutine_self(),
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001631 };
1632
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001633 qemu_co_queue_init(&req->wait_queue);
1634
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001635 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1636}
1637
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001638/**
1639 * Round a region to cluster boundaries
1640 */
1641static void round_to_clusters(BlockDriverState *bs,
1642 int64_t sector_num, int nb_sectors,
1643 int64_t *cluster_sector_num,
1644 int *cluster_nb_sectors)
1645{
1646 BlockDriverInfo bdi;
1647
1648 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1649 *cluster_sector_num = sector_num;
1650 *cluster_nb_sectors = nb_sectors;
1651 } else {
1652 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1653 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1654 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1655 nb_sectors, c);
1656 }
1657}
1658
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001659static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1660 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001661 /* aaaa bbbb */
1662 if (sector_num >= req->sector_num + req->nb_sectors) {
1663 return false;
1664 }
1665 /* bbbb aaaa */
1666 if (req->sector_num >= sector_num + nb_sectors) {
1667 return false;
1668 }
1669 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001670}
1671
1672static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1673 int64_t sector_num, int nb_sectors)
1674{
1675 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001676 int64_t cluster_sector_num;
1677 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001678 bool retry;
1679
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001680 /* If we touch the same cluster it counts as an overlap. This guarantees
1681 * that allocating writes will be serialized and not race with each other
1682 * for the same cluster. For example, in copy-on-read it ensures that the
1683 * CoR read and write operations are atomic and guest writes cannot
1684 * interleave between them.
1685 */
1686 round_to_clusters(bs, sector_num, nb_sectors,
1687 &cluster_sector_num, &cluster_nb_sectors);
1688
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001689 do {
1690 retry = false;
1691 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001692 if (tracked_request_overlaps(req, cluster_sector_num,
1693 cluster_nb_sectors)) {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001694 /* Hitting this means there was a reentrant request, for
1695 * example, a block driver issuing nested requests. This must
1696 * never happen since it means deadlock.
1697 */
1698 assert(qemu_coroutine_self() != req->co);
1699
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001700 qemu_co_queue_wait(&req->wait_queue);
1701 retry = true;
1702 break;
1703 }
1704 }
1705 } while (retry);
1706}
1707
Kevin Wolf756e6732010-01-12 12:55:17 +01001708/*
1709 * Return values:
1710 * 0 - success
1711 * -EINVAL - backing format specified, but no file
1712 * -ENOSPC - can't update the backing file because no space is left in the
1713 * image file header
1714 * -ENOTSUP - format driver doesn't support changing the backing file
1715 */
1716int bdrv_change_backing_file(BlockDriverState *bs,
1717 const char *backing_file, const char *backing_fmt)
1718{
1719 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02001720 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01001721
Paolo Bonzini5f377792012-04-12 14:01:01 +02001722 /* Backing file format doesn't make sense without a backing file */
1723 if (backing_fmt && !backing_file) {
1724 return -EINVAL;
1725 }
1726
Kevin Wolf756e6732010-01-12 12:55:17 +01001727 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02001728 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01001729 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02001730 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01001731 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02001732
1733 if (ret == 0) {
1734 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
1735 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
1736 }
1737 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01001738}
1739
Jeff Cody6ebdcee2012-09-27 13:29:12 -04001740/*
1741 * Finds the image layer in the chain that has 'bs' as its backing file.
1742 *
1743 * active is the current topmost image.
1744 *
1745 * Returns NULL if bs is not found in active's image chain,
1746 * or if active == bs.
1747 */
1748BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
1749 BlockDriverState *bs)
1750{
1751 BlockDriverState *overlay = NULL;
1752 BlockDriverState *intermediate;
1753
1754 assert(active != NULL);
1755 assert(bs != NULL);
1756
1757 /* if bs is the same as active, then by definition it has no overlay
1758 */
1759 if (active == bs) {
1760 return NULL;
1761 }
1762
1763 intermediate = active;
1764 while (intermediate->backing_hd) {
1765 if (intermediate->backing_hd == bs) {
1766 overlay = intermediate;
1767 break;
1768 }
1769 intermediate = intermediate->backing_hd;
1770 }
1771
1772 return overlay;
1773}
1774
1775typedef struct BlkIntermediateStates {
1776 BlockDriverState *bs;
1777 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
1778} BlkIntermediateStates;
1779
1780
1781/*
1782 * Drops images above 'base' up to and including 'top', and sets the image
1783 * above 'top' to have base as its backing file.
1784 *
1785 * Requires that the overlay to 'top' is opened r/w, so that the backing file
1786 * information in 'bs' can be properly updated.
1787 *
1788 * E.g., this will convert the following chain:
1789 * bottom <- base <- intermediate <- top <- active
1790 *
1791 * to
1792 *
1793 * bottom <- base <- active
1794 *
1795 * It is allowed for bottom==base, in which case it converts:
1796 *
1797 * base <- intermediate <- top <- active
1798 *
1799 * to
1800 *
1801 * base <- active
1802 *
1803 * Error conditions:
1804 * if active == top, that is considered an error
1805 *
1806 */
1807int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
1808 BlockDriverState *base)
1809{
1810 BlockDriverState *intermediate;
1811 BlockDriverState *base_bs = NULL;
1812 BlockDriverState *new_top_bs = NULL;
1813 BlkIntermediateStates *intermediate_state, *next;
1814 int ret = -EIO;
1815
1816 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
1817 QSIMPLEQ_INIT(&states_to_delete);
1818
1819 if (!top->drv || !base->drv) {
1820 goto exit;
1821 }
1822
1823 new_top_bs = bdrv_find_overlay(active, top);
1824
1825 if (new_top_bs == NULL) {
1826 /* we could not find the image above 'top', this is an error */
1827 goto exit;
1828 }
1829
1830 /* special case of new_top_bs->backing_hd already pointing to base - nothing
1831 * to do, no intermediate images */
1832 if (new_top_bs->backing_hd == base) {
1833 ret = 0;
1834 goto exit;
1835 }
1836
1837 intermediate = top;
1838
1839 /* now we will go down through the list, and add each BDS we find
1840 * into our deletion queue, until we hit the 'base'
1841 */
1842 while (intermediate) {
1843 intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
1844 intermediate_state->bs = intermediate;
1845 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
1846
1847 if (intermediate->backing_hd == base) {
1848 base_bs = intermediate->backing_hd;
1849 break;
1850 }
1851 intermediate = intermediate->backing_hd;
1852 }
1853 if (base_bs == NULL) {
1854 /* something went wrong, we did not end at the base. safely
1855 * unravel everything, and exit with error */
1856 goto exit;
1857 }
1858
1859 /* success - we can delete the intermediate states, and link top->base */
1860 ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
1861 base_bs->drv ? base_bs->drv->format_name : "");
1862 if (ret) {
1863 goto exit;
1864 }
1865 new_top_bs->backing_hd = base_bs;
1866
1867
1868 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
1869 /* so that bdrv_close() does not recursively close the chain */
1870 intermediate_state->bs->backing_hd = NULL;
1871 bdrv_delete(intermediate_state->bs);
1872 }
1873 ret = 0;
1874
1875exit:
1876 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
1877 g_free(intermediate_state);
1878 }
1879 return ret;
1880}
1881
1882
aliguori71d07702009-03-03 17:37:16 +00001883static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1884 size_t size)
1885{
1886 int64_t len;
1887
1888 if (!bdrv_is_inserted(bs))
1889 return -ENOMEDIUM;
1890
1891 if (bs->growable)
1892 return 0;
1893
1894 len = bdrv_getlength(bs);
1895
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001896 if (offset < 0)
1897 return -EIO;
1898
1899 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001900 return -EIO;
1901
1902 return 0;
1903}
1904
1905static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1906 int nb_sectors)
1907{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001908 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1909 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001910}
1911
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001912typedef struct RwCo {
1913 BlockDriverState *bs;
1914 int64_t sector_num;
1915 int nb_sectors;
1916 QEMUIOVector *qiov;
1917 bool is_write;
1918 int ret;
1919} RwCo;
1920
1921static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1922{
1923 RwCo *rwco = opaque;
1924
1925 if (!rwco->is_write) {
1926 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001927 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001928 } else {
1929 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001930 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001931 }
1932}
1933
1934/*
1935 * Process a synchronous request using coroutines
1936 */
1937static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1938 int nb_sectors, bool is_write)
1939{
1940 QEMUIOVector qiov;
1941 struct iovec iov = {
1942 .iov_base = (void *)buf,
1943 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1944 };
1945 Coroutine *co;
1946 RwCo rwco = {
1947 .bs = bs,
1948 .sector_num = sector_num,
1949 .nb_sectors = nb_sectors,
1950 .qiov = &qiov,
1951 .is_write = is_write,
1952 .ret = NOT_DONE,
1953 };
1954
1955 qemu_iovec_init_external(&qiov, &iov, 1);
1956
Zhi Yong Wu498e3862012-04-02 18:59:34 +08001957 /**
1958 * In sync call context, when the vcpu is blocked, this throttling timer
1959 * will not fire; so the I/O throttling function has to be disabled here
1960 * if it has been enabled.
1961 */
1962 if (bs->io_limits_enabled) {
1963 fprintf(stderr, "Disabling I/O throttling on '%s' due "
1964 "to synchronous I/O.\n", bdrv_get_device_name(bs));
1965 bdrv_io_limits_disable(bs);
1966 }
1967
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001968 if (qemu_in_coroutine()) {
1969 /* Fast-path if already in coroutine context */
1970 bdrv_rw_co_entry(&rwco);
1971 } else {
1972 co = qemu_coroutine_create(bdrv_rw_co_entry);
1973 qemu_coroutine_enter(co, &rwco);
1974 while (rwco.ret == NOT_DONE) {
1975 qemu_aio_wait();
1976 }
1977 }
1978 return rwco.ret;
1979}
1980
bellard19cb3732006-08-19 11:45:59 +00001981/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001982int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001983 uint8_t *buf, int nb_sectors)
1984{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001985 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001986}
1987
Markus Armbruster07d27a42012-06-29 17:34:29 +02001988/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
1989int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
1990 uint8_t *buf, int nb_sectors)
1991{
1992 bool enabled;
1993 int ret;
1994
1995 enabled = bs->io_limits_enabled;
1996 bs->io_limits_enabled = false;
1997 ret = bdrv_read(bs, 0, buf, 1);
1998 bs->io_limits_enabled = enabled;
1999 return ret;
2000}
2001
Paolo Bonzini71df14f2012-04-12 14:01:04 +02002002#define BITS_PER_LONG (sizeof(unsigned long) * 8)
2003
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002004static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002005 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002006{
2007 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01002008 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002009
Jan Kiszka6ea44302009-11-30 18:21:19 +01002010 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01002011 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002012
2013 for (; start <= end; start++) {
Paolo Bonzini71df14f2012-04-12 14:01:04 +02002014 idx = start / BITS_PER_LONG;
2015 bit = start % BITS_PER_LONG;
Jan Kiszkac6d22832009-11-30 18:21:20 +01002016 val = bs->dirty_bitmap[idx];
2017 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02002018 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02002019 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02002020 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02002021 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01002022 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02002023 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02002024 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02002025 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02002026 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01002027 }
2028 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002029 }
2030}
2031
ths5fafdf22007-09-16 21:08:06 +00002032/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002033 -EIO generic I/O error (may happen for all errors)
2034 -ENOMEDIUM No media inserted.
2035 -EINVAL Invalid sector number or nb_sectors
2036 -EACCES Trying to write a read-only device
2037*/
ths5fafdf22007-09-16 21:08:06 +00002038int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002039 const uint8_t *buf, int nb_sectors)
2040{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002041 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00002042}
2043
aliguorieda578e2009-03-12 19:57:16 +00002044int bdrv_pread(BlockDriverState *bs, int64_t offset,
2045 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00002046{
Jan Kiszka6ea44302009-11-30 18:21:19 +01002047 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00002048 int len, nb_sectors, count;
2049 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002050 int ret;
bellard83f64092006-08-01 16:21:11 +00002051
2052 count = count1;
2053 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01002054 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00002055 if (len > count)
2056 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01002057 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00002058 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002059 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
2060 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01002061 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00002062 count -= len;
2063 if (count == 0)
2064 return count1;
2065 sector_num++;
2066 buf += len;
2067 }
2068
2069 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01002070 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00002071 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002072 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
2073 return ret;
bellard83f64092006-08-01 16:21:11 +00002074 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01002075 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00002076 buf += len;
2077 count -= len;
2078 }
2079
2080 /* add data from the last sector */
2081 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002082 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
2083 return ret;
bellard83f64092006-08-01 16:21:11 +00002084 memcpy(buf, tmp_buf, count);
2085 }
2086 return count1;
2087}
2088
aliguorieda578e2009-03-12 19:57:16 +00002089int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
2090 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00002091{
Jan Kiszka6ea44302009-11-30 18:21:19 +01002092 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00002093 int len, nb_sectors, count;
2094 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002095 int ret;
bellard83f64092006-08-01 16:21:11 +00002096
2097 count = count1;
2098 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01002099 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00002100 if (len > count)
2101 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01002102 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00002103 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002104 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
2105 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01002106 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002107 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
2108 return ret;
bellard83f64092006-08-01 16:21:11 +00002109 count -= len;
2110 if (count == 0)
2111 return count1;
2112 sector_num++;
2113 buf += len;
2114 }
2115
2116 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01002117 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00002118 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002119 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
2120 return ret;
bellard83f64092006-08-01 16:21:11 +00002121 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01002122 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00002123 buf += len;
2124 count -= len;
2125 }
2126
2127 /* add data from the last sector */
2128 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002129 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
2130 return ret;
bellard83f64092006-08-01 16:21:11 +00002131 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002132 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
2133 return ret;
bellard83f64092006-08-01 16:21:11 +00002134 }
2135 return count1;
2136}
bellard83f64092006-08-01 16:21:11 +00002137
Kevin Wolff08145f2010-06-16 16:38:15 +02002138/*
2139 * Writes to the file and ensures that no writes are reordered across this
2140 * request (acts as a barrier)
2141 *
2142 * Returns 0 on success, -errno in error cases.
2143 */
2144int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2145 const void *buf, int count)
2146{
2147 int ret;
2148
2149 ret = bdrv_pwrite(bs, offset, buf, count);
2150 if (ret < 0) {
2151 return ret;
2152 }
2153
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002154 /* No flush needed for cache modes that already do it */
2155 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002156 bdrv_flush(bs);
2157 }
2158
2159 return 0;
2160}
2161
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002162static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002163 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2164{
2165 /* Perform I/O through a temporary buffer so that users who scribble over
2166 * their read buffer while the operation is in progress do not end up
2167 * modifying the image file. This is critical for zero-copy guest I/O
2168 * where anything might happen inside guest memory.
2169 */
2170 void *bounce_buffer;
2171
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002172 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002173 struct iovec iov;
2174 QEMUIOVector bounce_qiov;
2175 int64_t cluster_sector_num;
2176 int cluster_nb_sectors;
2177 size_t skip_bytes;
2178 int ret;
2179
2180 /* Cover entire cluster so no additional backing file I/O is required when
2181 * allocating cluster in the image file.
2182 */
2183 round_to_clusters(bs, sector_num, nb_sectors,
2184 &cluster_sector_num, &cluster_nb_sectors);
2185
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002186 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2187 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002188
2189 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
2190 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
2191 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
2192
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002193 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
2194 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002195 if (ret < 0) {
2196 goto err;
2197 }
2198
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002199 if (drv->bdrv_co_write_zeroes &&
2200 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01002201 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
2202 cluster_nb_sectors);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002203 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002204 /* This does not change the data on the disk, it is not necessary
2205 * to flush even in cache=writethrough mode.
2206 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002207 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002208 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002209 }
2210
Stefan Hajnocziab185922011-11-17 13:40:31 +00002211 if (ret < 0) {
2212 /* It might be okay to ignore write errors for guest requests. If this
2213 * is a deliberate copy-on-read then we don't want to ignore the error.
2214 * Simply report it in all cases.
2215 */
2216 goto err;
2217 }
2218
2219 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04002220 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
2221 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002222
2223err:
2224 qemu_vfree(bounce_buffer);
2225 return ret;
2226}
2227
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002228/*
2229 * Handle a read request in coroutine context
2230 */
2231static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002232 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
2233 BdrvRequestFlags flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02002234{
2235 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002236 BdrvTrackedRequest req;
2237 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002238
Kevin Wolfda1fa912011-07-14 17:27:13 +02002239 if (!drv) {
2240 return -ENOMEDIUM;
2241 }
2242 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2243 return -EIO;
2244 }
2245
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08002246 /* throttling disk read I/O */
2247 if (bs->io_limits_enabled) {
2248 bdrv_io_limits_intercept(bs, false, nb_sectors);
2249 }
2250
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002251 if (bs->copy_on_read) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002252 flags |= BDRV_REQ_COPY_ON_READ;
2253 }
2254 if (flags & BDRV_REQ_COPY_ON_READ) {
2255 bs->copy_on_read_in_flight++;
2256 }
2257
2258 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002259 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
2260 }
2261
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002262 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002263
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002264 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00002265 int pnum;
2266
2267 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
2268 if (ret < 0) {
2269 goto out;
2270 }
2271
2272 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002273 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002274 goto out;
2275 }
2276 }
2277
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002278 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002279
2280out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002281 tracked_request_end(&req);
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002282
2283 if (flags & BDRV_REQ_COPY_ON_READ) {
2284 bs->copy_on_read_in_flight--;
2285 }
2286
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002287 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002288}
2289
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002290int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02002291 int nb_sectors, QEMUIOVector *qiov)
2292{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002293 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02002294
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002295 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
2296}
2297
2298int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
2299 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2300{
2301 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
2302
2303 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
2304 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002305}
2306
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00002307static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
2308 int64_t sector_num, int nb_sectors)
2309{
2310 BlockDriver *drv = bs->drv;
2311 QEMUIOVector qiov;
2312 struct iovec iov;
2313 int ret;
2314
Kevin Wolf621f0582012-03-20 15:12:58 +01002315 /* TODO Emulate only part of misaligned requests instead of letting block
2316 * drivers return -ENOTSUP and emulate everything */
2317
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00002318 /* First try the efficient write zeroes operation */
2319 if (drv->bdrv_co_write_zeroes) {
Kevin Wolf621f0582012-03-20 15:12:58 +01002320 ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
2321 if (ret != -ENOTSUP) {
2322 return ret;
2323 }
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00002324 }
2325
2326 /* Fall back to bounce buffer if write zeroes is unsupported */
2327 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2328 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
2329 memset(iov.iov_base, 0, iov.iov_len);
2330 qemu_iovec_init_external(&qiov, &iov, 1);
2331
2332 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
2333
2334 qemu_vfree(iov.iov_base);
2335 return ret;
2336}
2337
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002338/*
2339 * Handle a write request in coroutine context
2340 */
2341static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00002342 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
2343 BdrvRequestFlags flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002344{
2345 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002346 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01002347 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002348
2349 if (!bs->drv) {
2350 return -ENOMEDIUM;
2351 }
2352 if (bs->read_only) {
2353 return -EACCES;
2354 }
2355 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2356 return -EIO;
2357 }
2358
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08002359 /* throttling disk write I/O */
2360 if (bs->io_limits_enabled) {
2361 bdrv_io_limits_intercept(bs, true, nb_sectors);
2362 }
2363
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002364 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002365 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
2366 }
2367
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002368 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
2369
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00002370 if (flags & BDRV_REQ_ZERO_WRITE) {
2371 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
2372 } else {
2373 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
2374 }
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01002375
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002376 if (ret == 0 && !bs->enable_write_cache) {
2377 ret = bdrv_co_flush(bs);
2378 }
2379
Kevin Wolfda1fa912011-07-14 17:27:13 +02002380 if (bs->dirty_bitmap) {
2381 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2382 }
2383
2384 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2385 bs->wr_highest_sector = sector_num + nb_sectors - 1;
2386 }
2387
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002388 tracked_request_end(&req);
2389
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01002390 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002391}
2392
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002393int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
2394 int nb_sectors, QEMUIOVector *qiov)
2395{
2396 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
2397
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00002398 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
2399}
2400
2401int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
2402 int64_t sector_num, int nb_sectors)
2403{
2404 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
2405
2406 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
2407 BDRV_REQ_ZERO_WRITE);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002408}
2409
bellard83f64092006-08-01 16:21:11 +00002410/**
bellard83f64092006-08-01 16:21:11 +00002411 * Truncate file to 'offset' bytes (needed only for file protocols)
2412 */
2413int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2414{
2415 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002416 int ret;
bellard83f64092006-08-01 16:21:11 +00002417 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002418 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00002419 if (!drv->bdrv_truncate)
2420 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02002421 if (bs->read_only)
2422 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02002423 if (bdrv_in_use(bs))
2424 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002425 ret = drv->bdrv_truncate(bs, offset);
2426 if (ret == 0) {
2427 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02002428 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002429 }
2430 return ret;
bellard83f64092006-08-01 16:21:11 +00002431}
2432
2433/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08002434 * Length of a allocated file in bytes. Sparse files are counted by actual
2435 * allocated space. Return < 0 if error or unknown.
2436 */
2437int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2438{
2439 BlockDriver *drv = bs->drv;
2440 if (!drv) {
2441 return -ENOMEDIUM;
2442 }
2443 if (drv->bdrv_get_allocated_file_size) {
2444 return drv->bdrv_get_allocated_file_size(bs);
2445 }
2446 if (bs->file) {
2447 return bdrv_get_allocated_file_size(bs->file);
2448 }
2449 return -ENOTSUP;
2450}
2451
2452/**
bellard83f64092006-08-01 16:21:11 +00002453 * Length of a file in bytes. Return < 0 if error or unknown.
2454 */
2455int64_t bdrv_getlength(BlockDriverState *bs)
2456{
2457 BlockDriver *drv = bs->drv;
2458 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002459 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002460
Markus Armbruster2c6942f2011-09-06 18:58:51 +02002461 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01002462 if (drv->bdrv_getlength) {
2463 return drv->bdrv_getlength(bs);
2464 }
bellard83f64092006-08-01 16:21:11 +00002465 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01002466 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00002467}
2468
bellard19cb3732006-08-19 11:45:59 +00002469/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00002470void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00002471{
bellard19cb3732006-08-19 11:45:59 +00002472 int64_t length;
2473 length = bdrv_getlength(bs);
2474 if (length < 0)
2475 length = 0;
2476 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01002477 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00002478 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00002479}
bellardcf989512004-02-16 21:56:36 +00002480
Zhi Yong Wu0563e192011-11-03 16:57:25 +08002481/* throttling disk io limits */
2482void bdrv_set_io_limits(BlockDriverState *bs,
2483 BlockIOLimit *io_limits)
2484{
2485 bs->io_limits = *io_limits;
2486 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2487}
2488
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02002489void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2490 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02002491{
2492 bs->on_read_error = on_read_error;
2493 bs->on_write_error = on_write_error;
2494}
2495
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02002496BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02002497{
2498 return is_read ? bs->on_read_error : bs->on_write_error;
2499}
2500
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02002501BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2502{
2503 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2504
2505 switch (on_err) {
2506 case BLOCKDEV_ON_ERROR_ENOSPC:
2507 return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
2508 case BLOCKDEV_ON_ERROR_STOP:
2509 return BDRV_ACTION_STOP;
2510 case BLOCKDEV_ON_ERROR_REPORT:
2511 return BDRV_ACTION_REPORT;
2512 case BLOCKDEV_ON_ERROR_IGNORE:
2513 return BDRV_ACTION_IGNORE;
2514 default:
2515 abort();
2516 }
2517}
2518
2519/* This is done by device models because, while the block layer knows
2520 * about the error, it does not know whether an operation comes from
2521 * the device or the block layer (from a job, for example).
2522 */
2523void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2524 bool is_read, int error)
2525{
2526 assert(error >= 0);
Paolo Bonzini32c81a42012-09-28 17:22:58 +02002527 bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02002528 if (action == BDRV_ACTION_STOP) {
2529 vm_stop(RUN_STATE_IO_ERROR);
2530 bdrv_iostatus_set_err(bs, error);
2531 }
2532}
2533
bellardb3380822004-03-14 21:38:54 +00002534int bdrv_is_read_only(BlockDriverState *bs)
2535{
2536 return bs->read_only;
2537}
2538
ths985a03b2007-12-24 16:10:43 +00002539int bdrv_is_sg(BlockDriverState *bs)
2540{
2541 return bs->sg;
2542}
2543
Christoph Hellwige900a7b2009-09-04 19:01:15 +02002544int bdrv_enable_write_cache(BlockDriverState *bs)
2545{
2546 return bs->enable_write_cache;
2547}
2548
Paolo Bonzini425b0142012-06-06 00:04:52 +02002549void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2550{
2551 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04002552
2553 /* so a reopen() will preserve wce */
2554 if (wce) {
2555 bs->open_flags |= BDRV_O_CACHE_WB;
2556 } else {
2557 bs->open_flags &= ~BDRV_O_CACHE_WB;
2558 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02002559}
2560
bellardea2384d2004-08-01 21:59:26 +00002561int bdrv_is_encrypted(BlockDriverState *bs)
2562{
2563 if (bs->backing_hd && bs->backing_hd->encrypted)
2564 return 1;
2565 return bs->encrypted;
2566}
2567
aliguoric0f4ce72009-03-05 23:01:01 +00002568int bdrv_key_required(BlockDriverState *bs)
2569{
2570 BlockDriverState *backing_hd = bs->backing_hd;
2571
2572 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2573 return 1;
2574 return (bs->encrypted && !bs->valid_key);
2575}
2576
bellardea2384d2004-08-01 21:59:26 +00002577int bdrv_set_key(BlockDriverState *bs, const char *key)
2578{
2579 int ret;
2580 if (bs->backing_hd && bs->backing_hd->encrypted) {
2581 ret = bdrv_set_key(bs->backing_hd, key);
2582 if (ret < 0)
2583 return ret;
2584 if (!bs->encrypted)
2585 return 0;
2586 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002587 if (!bs->encrypted) {
2588 return -EINVAL;
2589 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2590 return -ENOMEDIUM;
2591 }
aliguoric0f4ce72009-03-05 23:01:01 +00002592 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002593 if (ret < 0) {
2594 bs->valid_key = 0;
2595 } else if (!bs->valid_key) {
2596 bs->valid_key = 1;
2597 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002598 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002599 }
aliguoric0f4ce72009-03-05 23:01:01 +00002600 return ret;
bellardea2384d2004-08-01 21:59:26 +00002601}
2602
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02002603const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00002604{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02002605 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00002606}
2607
ths5fafdf22007-09-16 21:08:06 +00002608void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002609 void *opaque)
2610{
2611 BlockDriver *drv;
2612
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002613 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002614 it(opaque, drv->format_name);
2615 }
2616}
2617
bellardb3380822004-03-14 21:38:54 +00002618BlockDriverState *bdrv_find(const char *name)
2619{
2620 BlockDriverState *bs;
2621
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002622 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2623 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002624 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002625 }
bellardb3380822004-03-14 21:38:54 +00002626 }
2627 return NULL;
2628}
2629
Markus Armbruster2f399b02010-06-02 18:55:20 +02002630BlockDriverState *bdrv_next(BlockDriverState *bs)
2631{
2632 if (!bs) {
2633 return QTAILQ_FIRST(&bdrv_states);
2634 }
2635 return QTAILQ_NEXT(bs, list);
2636}
2637
aliguori51de9762009-03-05 23:00:43 +00002638void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002639{
2640 BlockDriverState *bs;
2641
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002642 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002643 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002644 }
2645}
2646
bellardea2384d2004-08-01 21:59:26 +00002647const char *bdrv_get_device_name(BlockDriverState *bs)
2648{
2649 return bs->device_name;
2650}
2651
Markus Armbrusterc8433282012-06-05 16:49:24 +02002652int bdrv_get_flags(BlockDriverState *bs)
2653{
2654 return bs->open_flags;
2655}
2656
aliguoric6ca28d2008-10-06 13:55:43 +00002657void bdrv_flush_all(void)
2658{
2659 BlockDriverState *bs;
2660
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002661 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01002662 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002663 }
aliguoric6ca28d2008-10-06 13:55:43 +00002664}
2665
Kevin Wolff2feebb2010-04-14 17:30:35 +02002666int bdrv_has_zero_init(BlockDriverState *bs)
2667{
2668 assert(bs->drv);
2669
Kevin Wolf336c1c12010-07-28 11:26:29 +02002670 if (bs->drv->bdrv_has_zero_init) {
2671 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002672 }
2673
2674 return 1;
2675}
2676
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002677typedef struct BdrvCoIsAllocatedData {
2678 BlockDriverState *bs;
2679 int64_t sector_num;
2680 int nb_sectors;
2681 int *pnum;
2682 int ret;
2683 bool done;
2684} BdrvCoIsAllocatedData;
2685
thsf58c7b32008-06-05 21:53:49 +00002686/*
2687 * Returns true iff the specified sector is present in the disk image. Drivers
2688 * not implementing the functionality are assumed to not support backing files,
2689 * hence all their sectors are reported as allocated.
2690 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002691 * If 'sector_num' is beyond the end of the disk image the return value is 0
2692 * and 'pnum' is set to 0.
2693 *
thsf58c7b32008-06-05 21:53:49 +00002694 * 'pnum' is set to the number of sectors (including and immediately following
2695 * the specified sector) that are known to be in the same
2696 * allocated/unallocated state.
2697 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002698 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2699 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00002700 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002701int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2702 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002703{
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002704 int64_t n;
2705
2706 if (sector_num >= bs->total_sectors) {
2707 *pnum = 0;
2708 return 0;
2709 }
2710
2711 n = bs->total_sectors - sector_num;
2712 if (n < nb_sectors) {
2713 nb_sectors = n;
2714 }
2715
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002716 if (!bs->drv->bdrv_co_is_allocated) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002717 *pnum = nb_sectors;
thsf58c7b32008-06-05 21:53:49 +00002718 return 1;
2719 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002720
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002721 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2722}
2723
2724/* Coroutine wrapper for bdrv_is_allocated() */
2725static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2726{
2727 BdrvCoIsAllocatedData *data = opaque;
2728 BlockDriverState *bs = data->bs;
2729
2730 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2731 data->pnum);
2732 data->done = true;
2733}
2734
2735/*
2736 * Synchronous wrapper around bdrv_co_is_allocated().
2737 *
2738 * See bdrv_co_is_allocated() for details.
2739 */
2740int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2741 int *pnum)
2742{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002743 Coroutine *co;
2744 BdrvCoIsAllocatedData data = {
2745 .bs = bs,
2746 .sector_num = sector_num,
2747 .nb_sectors = nb_sectors,
2748 .pnum = pnum,
2749 .done = false,
2750 };
2751
2752 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2753 qemu_coroutine_enter(co, &data);
2754 while (!data.done) {
2755 qemu_aio_wait();
2756 }
2757 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002758}
2759
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02002760/*
2761 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
2762 *
2763 * Return true if the given sector is allocated in any image between
2764 * BASE and TOP (inclusive). BASE can be NULL to check if the given
2765 * sector is allocated in any image of the chain. Return false otherwise.
2766 *
2767 * 'pnum' is set to the number of sectors (including and immediately following
2768 * the specified sector) that are known to be in the same
2769 * allocated/unallocated state.
2770 *
2771 */
2772int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
2773 BlockDriverState *base,
2774 int64_t sector_num,
2775 int nb_sectors, int *pnum)
2776{
2777 BlockDriverState *intermediate;
2778 int ret, n = nb_sectors;
2779
2780 intermediate = top;
2781 while (intermediate && intermediate != base) {
2782 int pnum_inter;
2783 ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors,
2784 &pnum_inter);
2785 if (ret < 0) {
2786 return ret;
2787 } else if (ret) {
2788 *pnum = pnum_inter;
2789 return 1;
2790 }
2791
2792 /*
2793 * [sector_num, nb_sectors] is unallocated on top but intermediate
2794 * might have
2795 *
2796 * [sector_num+x, nr_sectors] allocated.
2797 */
2798 if (n > pnum_inter) {
2799 n = pnum_inter;
2800 }
2801
2802 intermediate = intermediate->backing_hd;
2803 }
2804
2805 *pnum = n;
2806 return 0;
2807}
2808
Luiz Capitulinob2023812011-09-21 17:16:47 -03002809BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002810{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002811 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002812 BlockDriverState *bs;
2813
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002814 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002815 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002816
Luiz Capitulinob2023812011-09-21 17:16:47 -03002817 info->value = g_malloc0(sizeof(*info->value));
2818 info->value->device = g_strdup(bs->device_name);
2819 info->value->type = g_strdup("unknown");
2820 info->value->locked = bdrv_dev_is_medium_locked(bs);
2821 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002822
Markus Armbrustere4def802011-09-06 18:58:53 +02002823 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002824 info->value->has_tray_open = true;
2825 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002826 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002827
2828 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002829 info->value->has_io_status = true;
2830 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002831 }
2832
bellard19cb3732006-08-19 11:45:59 +00002833 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002834 info->value->has_inserted = true;
2835 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2836 info->value->inserted->file = g_strdup(bs->filename);
2837 info->value->inserted->ro = bs->read_only;
2838 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2839 info->value->inserted->encrypted = bs->encrypted;
Luiz Capitulinoc75a1a82012-07-26 20:28:44 -03002840 info->value->inserted->encryption_key_missing = bdrv_key_required(bs);
Luiz Capitulinob2023812011-09-21 17:16:47 -03002841 if (bs->backing_file[0]) {
2842 info->value->inserted->has_backing_file = true;
2843 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002844 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002845
Benoît Canet2e3e3312012-08-02 10:22:48 +02002846 info->value->inserted->backing_file_depth =
2847 bdrv_get_backing_file_depth(bs);
2848
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002849 if (bs->io_limits_enabled) {
2850 info->value->inserted->bps =
2851 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2852 info->value->inserted->bps_rd =
2853 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2854 info->value->inserted->bps_wr =
2855 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2856 info->value->inserted->iops =
2857 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2858 info->value->inserted->iops_rd =
2859 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2860 info->value->inserted->iops_wr =
2861 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2862 }
bellardb3380822004-03-14 21:38:54 +00002863 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002864
2865 /* XXX: waiting for the qapi to support GSList */
2866 if (!cur_item) {
2867 head = cur_item = info;
2868 } else {
2869 cur_item->next = info;
2870 cur_item = info;
2871 }
bellardb3380822004-03-14 21:38:54 +00002872 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002873
Luiz Capitulinob2023812011-09-21 17:16:47 -03002874 return head;
bellardb3380822004-03-14 21:38:54 +00002875}
thsa36e69d2007-12-02 05:18:19 +00002876
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002877/* Consider exposing this as a full fledged QMP command */
2878static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002879{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002880 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002881
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002882 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002883
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002884 if (bs->device_name[0]) {
2885 s->has_device = true;
2886 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002887 }
2888
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002889 s->stats = g_malloc0(sizeof(*s->stats));
2890 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2891 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2892 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2893 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2894 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2895 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2896 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2897 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2898 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2899
Kevin Wolf294cc352010-04-28 14:34:01 +02002900 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002901 s->has_parent = true;
2902 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002903 }
2904
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002905 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002906}
2907
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002908BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002909{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002910 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002911 BlockDriverState *bs;
2912
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002913 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002914 BlockStatsList *info = g_malloc0(sizeof(*info));
2915 info->value = qmp_query_blockstat(bs, NULL);
2916
2917 /* XXX: waiting for the qapi to support GSList */
2918 if (!cur_item) {
2919 head = cur_item = info;
2920 } else {
2921 cur_item->next = info;
2922 cur_item = info;
2923 }
thsa36e69d2007-12-02 05:18:19 +00002924 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002925
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002926 return head;
thsa36e69d2007-12-02 05:18:19 +00002927}
bellardea2384d2004-08-01 21:59:26 +00002928
aliguori045df332009-03-05 23:00:48 +00002929const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2930{
2931 if (bs->backing_hd && bs->backing_hd->encrypted)
2932 return bs->backing_file;
2933 else if (bs->encrypted)
2934 return bs->filename;
2935 else
2936 return NULL;
2937}
2938
ths5fafdf22007-09-16 21:08:06 +00002939void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002940 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002941{
Kevin Wolf3574c602011-10-26 11:02:11 +02002942 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002943}
2944
ths5fafdf22007-09-16 21:08:06 +00002945int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002946 const uint8_t *buf, int nb_sectors)
2947{
2948 BlockDriver *drv = bs->drv;
2949 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002950 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002951 if (!drv->bdrv_write_compressed)
2952 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002953 if (bdrv_check_request(bs, sector_num, nb_sectors))
2954 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002955
Jan Kiszkac6d22832009-11-30 18:21:20 +01002956 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002957 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2958 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002959
bellardfaea38e2006-08-05 21:31:00 +00002960 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2961}
ths3b46e622007-09-17 08:09:54 +00002962
bellardfaea38e2006-08-05 21:31:00 +00002963int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2964{
2965 BlockDriver *drv = bs->drv;
2966 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002967 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002968 if (!drv->bdrv_get_info)
2969 return -ENOTSUP;
2970 memset(bdi, 0, sizeof(*bdi));
2971 return drv->bdrv_get_info(bs, bdi);
2972}
2973
Christoph Hellwig45566e92009-07-10 23:11:57 +02002974int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2975 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002976{
2977 BlockDriver *drv = bs->drv;
2978 if (!drv)
2979 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002980 if (drv->bdrv_save_vmstate)
2981 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2982 if (bs->file)
2983 return bdrv_save_vmstate(bs->file, buf, pos, size);
2984 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002985}
2986
Christoph Hellwig45566e92009-07-10 23:11:57 +02002987int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2988 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002989{
2990 BlockDriver *drv = bs->drv;
2991 if (!drv)
2992 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002993 if (drv->bdrv_load_vmstate)
2994 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2995 if (bs->file)
2996 return bdrv_load_vmstate(bs->file, buf, pos, size);
2997 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002998}
2999
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01003000void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
3001{
3002 BlockDriver *drv = bs->drv;
3003
3004 if (!drv || !drv->bdrv_debug_event) {
3005 return;
3006 }
3007
Blue Swirl0ed8b6f2012-07-08 06:56:53 +00003008 drv->bdrv_debug_event(bs, event);
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01003009
3010}
3011
bellardfaea38e2006-08-05 21:31:00 +00003012/**************************************************************/
3013/* handling of snapshots */
3014
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03003015int bdrv_can_snapshot(BlockDriverState *bs)
3016{
3017 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02003018 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03003019 return 0;
3020 }
3021
3022 if (!drv->bdrv_snapshot_create) {
3023 if (bs->file != NULL) {
3024 return bdrv_can_snapshot(bs->file);
3025 }
3026 return 0;
3027 }
3028
3029 return 1;
3030}
3031
Blue Swirl199630b2010-07-25 20:49:34 +00003032int bdrv_is_snapshot(BlockDriverState *bs)
3033{
3034 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3035}
3036
Markus Armbrusterf9092b12010-06-25 10:33:39 +02003037BlockDriverState *bdrv_snapshots(void)
3038{
3039 BlockDriverState *bs;
3040
Markus Armbruster3ac906f2010-07-01 09:30:38 +02003041 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02003042 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02003043 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02003044
3045 bs = NULL;
3046 while ((bs = bdrv_next(bs))) {
3047 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02003048 bs_snapshots = bs;
3049 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02003050 }
3051 }
3052 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02003053}
3054
ths5fafdf22007-09-16 21:08:06 +00003055int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00003056 QEMUSnapshotInfo *sn_info)
3057{
3058 BlockDriver *drv = bs->drv;
3059 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003060 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09003061 if (drv->bdrv_snapshot_create)
3062 return drv->bdrv_snapshot_create(bs, sn_info);
3063 if (bs->file)
3064 return bdrv_snapshot_create(bs->file, sn_info);
3065 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00003066}
3067
ths5fafdf22007-09-16 21:08:06 +00003068int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00003069 const char *snapshot_id)
3070{
3071 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09003072 int ret, open_ret;
3073
bellardfaea38e2006-08-05 21:31:00 +00003074 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003075 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09003076 if (drv->bdrv_snapshot_goto)
3077 return drv->bdrv_snapshot_goto(bs, snapshot_id);
3078
3079 if (bs->file) {
3080 drv->bdrv_close(bs);
3081 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
3082 open_ret = drv->bdrv_open(bs, bs->open_flags);
3083 if (open_ret < 0) {
3084 bdrv_delete(bs->file);
3085 bs->drv = NULL;
3086 return open_ret;
3087 }
3088 return ret;
3089 }
3090
3091 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00003092}
3093
3094int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
3095{
3096 BlockDriver *drv = bs->drv;
3097 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003098 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09003099 if (drv->bdrv_snapshot_delete)
3100 return drv->bdrv_snapshot_delete(bs, snapshot_id);
3101 if (bs->file)
3102 return bdrv_snapshot_delete(bs->file, snapshot_id);
3103 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00003104}
3105
ths5fafdf22007-09-16 21:08:06 +00003106int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00003107 QEMUSnapshotInfo **psn_info)
3108{
3109 BlockDriver *drv = bs->drv;
3110 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003111 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09003112 if (drv->bdrv_snapshot_list)
3113 return drv->bdrv_snapshot_list(bs, psn_info);
3114 if (bs->file)
3115 return bdrv_snapshot_list(bs->file, psn_info);
3116 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00003117}
3118
edison51ef6722010-09-21 19:58:41 -07003119int bdrv_snapshot_load_tmp(BlockDriverState *bs,
3120 const char *snapshot_name)
3121{
3122 BlockDriver *drv = bs->drv;
3123 if (!drv) {
3124 return -ENOMEDIUM;
3125 }
3126 if (!bs->read_only) {
3127 return -EINVAL;
3128 }
3129 if (drv->bdrv_snapshot_load_tmp) {
3130 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
3131 }
3132 return -ENOTSUP;
3133}
3134
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00003135BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3136 const char *backing_file)
3137{
3138 if (!bs->drv) {
3139 return NULL;
3140 }
3141
3142 if (bs->backing_hd) {
3143 if (strcmp(bs->backing_file, backing_file) == 0) {
3144 return bs->backing_hd;
3145 } else {
3146 return bdrv_find_backing_image(bs->backing_hd, backing_file);
3147 }
3148 }
3149
3150 return NULL;
3151}
3152
Benoît Canetf198fd12012-08-02 10:22:47 +02003153int bdrv_get_backing_file_depth(BlockDriverState *bs)
3154{
3155 if (!bs->drv) {
3156 return 0;
3157 }
3158
3159 if (!bs->backing_hd) {
3160 return 0;
3161 }
3162
3163 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
3164}
3165
Jeff Cody79fac562012-09-27 13:29:15 -04003166BlockDriverState *bdrv_find_base(BlockDriverState *bs)
3167{
3168 BlockDriverState *curr_bs = NULL;
3169
3170 if (!bs) {
3171 return NULL;
3172 }
3173
3174 curr_bs = bs;
3175
3176 while (curr_bs->backing_hd) {
3177 curr_bs = curr_bs->backing_hd;
3178 }
3179 return curr_bs;
3180}
3181
bellardfaea38e2006-08-05 21:31:00 +00003182#define NB_SUFFIXES 4
3183
3184char *get_human_readable_size(char *buf, int buf_size, int64_t size)
3185{
3186 static const char suffixes[NB_SUFFIXES] = "KMGT";
3187 int64_t base;
3188 int i;
3189
3190 if (size <= 999) {
3191 snprintf(buf, buf_size, "%" PRId64, size);
3192 } else {
3193 base = 1024;
3194 for(i = 0; i < NB_SUFFIXES; i++) {
3195 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00003196 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00003197 (double)size / base,
3198 suffixes[i]);
3199 break;
3200 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00003201 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00003202 ((size + (base >> 1)) / base),
3203 suffixes[i]);
3204 break;
3205 }
3206 base = base * 1024;
3207 }
3208 }
3209 return buf;
3210}
3211
3212char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
3213{
3214 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00003215#ifdef _WIN32
3216 struct tm *ptm;
3217#else
bellardfaea38e2006-08-05 21:31:00 +00003218 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00003219#endif
bellardfaea38e2006-08-05 21:31:00 +00003220 time_t ti;
3221 int64_t secs;
3222
3223 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00003224 snprintf(buf, buf_size,
3225 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00003226 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
3227 } else {
3228 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00003229#ifdef _WIN32
3230 ptm = localtime(&ti);
3231 strftime(date_buf, sizeof(date_buf),
3232 "%Y-%m-%d %H:%M:%S", ptm);
3233#else
bellardfaea38e2006-08-05 21:31:00 +00003234 localtime_r(&ti, &tm);
3235 strftime(date_buf, sizeof(date_buf),
3236 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00003237#endif
bellardfaea38e2006-08-05 21:31:00 +00003238 secs = sn->vm_clock_nsec / 1000000000;
3239 snprintf(clock_buf, sizeof(clock_buf),
3240 "%02d:%02d:%02d.%03d",
3241 (int)(secs / 3600),
3242 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00003243 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00003244 (int)((sn->vm_clock_nsec / 1000000) % 1000));
3245 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00003246 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00003247 sn->id_str, sn->name,
3248 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
3249 date_buf,
3250 clock_buf);
3251 }
3252 return buf;
3253}
3254
bellard83f64092006-08-01 16:21:11 +00003255/**************************************************************/
3256/* async I/Os */
3257
aliguori3b69e4b2009-01-22 16:59:24 +00003258BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00003259 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00003260 BlockDriverCompletionFunc *cb, void *opaque)
3261{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01003262 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
3263
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003264 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003265 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00003266}
3267
aliguorif141eaf2009-04-07 18:43:24 +00003268BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
3269 QEMUIOVector *qiov, int nb_sectors,
3270 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003271{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01003272 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
3273
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01003274 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003275 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00003276}
3277
Kevin Wolf40b4f532009-09-09 17:53:37 +02003278
3279typedef struct MultiwriteCB {
3280 int error;
3281 int num_requests;
3282 int num_callbacks;
3283 struct {
3284 BlockDriverCompletionFunc *cb;
3285 void *opaque;
3286 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003287 } callbacks[];
3288} MultiwriteCB;
3289
3290static void multiwrite_user_cb(MultiwriteCB *mcb)
3291{
3292 int i;
3293
3294 for (i = 0; i < mcb->num_callbacks; i++) {
3295 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01003296 if (mcb->callbacks[i].free_qiov) {
3297 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
3298 }
Anthony Liguori7267c092011-08-20 22:09:37 -05003299 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003300 }
3301}
3302
3303static void multiwrite_cb(void *opaque, int ret)
3304{
3305 MultiwriteCB *mcb = opaque;
3306
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01003307 trace_multiwrite_cb(mcb, ret);
3308
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02003309 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02003310 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003311 }
3312
3313 mcb->num_requests--;
3314 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02003315 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05003316 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003317 }
3318}
3319
3320static int multiwrite_req_compare(const void *a, const void *b)
3321{
Christoph Hellwig77be4362010-05-19 20:53:10 +02003322 const BlockRequest *req1 = a, *req2 = b;
3323
3324 /*
3325 * Note that we can't simply subtract req2->sector from req1->sector
3326 * here as that could overflow the return value.
3327 */
3328 if (req1->sector > req2->sector) {
3329 return 1;
3330 } else if (req1->sector < req2->sector) {
3331 return -1;
3332 } else {
3333 return 0;
3334 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02003335}
3336
3337/*
3338 * Takes a bunch of requests and tries to merge them. Returns the number of
3339 * requests that remain after merging.
3340 */
3341static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
3342 int num_reqs, MultiwriteCB *mcb)
3343{
3344 int i, outidx;
3345
3346 // Sort requests by start sector
3347 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
3348
3349 // Check if adjacent requests touch the same clusters. If so, combine them,
3350 // filling up gaps with zero sectors.
3351 outidx = 0;
3352 for (i = 1; i < num_reqs; i++) {
3353 int merge = 0;
3354 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
3355
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01003356 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02003357 if (reqs[i].sector <= oldreq_last) {
3358 merge = 1;
3359 }
3360
Christoph Hellwige2a305f2010-01-26 14:49:08 +01003361 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
3362 merge = 0;
3363 }
3364
Kevin Wolf40b4f532009-09-09 17:53:37 +02003365 if (merge) {
3366 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05003367 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02003368 qemu_iovec_init(qiov,
3369 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
3370
3371 // Add the first request to the merged one. If the requests are
3372 // overlapping, drop the last sectors of the first request.
3373 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04003374 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003375
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01003376 // We should need to add any zeros between the two requests
3377 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003378
3379 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04003380 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003381
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02003382 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003383 reqs[outidx].qiov = qiov;
3384
3385 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3386 } else {
3387 outidx++;
3388 reqs[outidx].sector = reqs[i].sector;
3389 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3390 reqs[outidx].qiov = reqs[i].qiov;
3391 }
3392 }
3393
3394 return outidx + 1;
3395}
3396
3397/*
3398 * Submit multiple AIO write requests at once.
3399 *
3400 * On success, the function returns 0 and all requests in the reqs array have
3401 * been submitted. In error case this function returns -1, and any of the
3402 * requests may or may not be submitted yet. In particular, this means that the
3403 * callback will be called for some of the requests, for others it won't. The
3404 * caller must check the error field of the BlockRequest to wait for the right
3405 * callbacks (if error != 0, no callback will be called).
3406 *
3407 * The implementation may modify the contents of the reqs array, e.g. to merge
3408 * requests. However, the fields opaque and error are left unmodified as they
3409 * are used to signal failure for a single request to the caller.
3410 */
3411int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3412{
Kevin Wolf40b4f532009-09-09 17:53:37 +02003413 MultiwriteCB *mcb;
3414 int i;
3415
Ryan Harper301db7c2011-03-07 10:01:04 -06003416 /* don't submit writes if we don't have a medium */
3417 if (bs->drv == NULL) {
3418 for (i = 0; i < num_reqs; i++) {
3419 reqs[i].error = -ENOMEDIUM;
3420 }
3421 return -1;
3422 }
3423
Kevin Wolf40b4f532009-09-09 17:53:37 +02003424 if (num_reqs == 0) {
3425 return 0;
3426 }
3427
3428 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05003429 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02003430 mcb->num_requests = 0;
3431 mcb->num_callbacks = num_reqs;
3432
3433 for (i = 0; i < num_reqs; i++) {
3434 mcb->callbacks[i].cb = reqs[i].cb;
3435 mcb->callbacks[i].opaque = reqs[i].opaque;
3436 }
3437
3438 // Check for mergable requests
3439 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3440
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01003441 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3442
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01003443 /* Run the aio requests. */
3444 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003445 for (i = 0; i < num_reqs; i++) {
Paolo Bonziniad54ae82011-11-30 09:12:30 +01003446 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
Kevin Wolf40b4f532009-09-09 17:53:37 +02003447 reqs[i].nb_sectors, multiwrite_cb, mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003448 }
3449
3450 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003451}
3452
bellard83f64092006-08-01 16:21:11 +00003453void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00003454{
aliguori6bbff9a2009-03-20 18:25:59 +00003455 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00003456}
3457
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08003458/* block I/O throttling */
3459static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3460 bool is_write, double elapsed_time, uint64_t *wait)
3461{
3462 uint64_t bps_limit = 0;
3463 double bytes_limit, bytes_base, bytes_res;
3464 double slice_time, wait_time;
3465
3466 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3467 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3468 } else if (bs->io_limits.bps[is_write]) {
3469 bps_limit = bs->io_limits.bps[is_write];
3470 } else {
3471 if (wait) {
3472 *wait = 0;
3473 }
3474
3475 return false;
3476 }
3477
3478 slice_time = bs->slice_end - bs->slice_start;
3479 slice_time /= (NANOSECONDS_PER_SECOND);
3480 bytes_limit = bps_limit * slice_time;
3481 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3482 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3483 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3484 }
3485
3486 /* bytes_base: the bytes of data which have been read/written; and
3487 * it is obtained from the history statistic info.
3488 * bytes_res: the remaining bytes of data which need to be read/written.
3489 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3490 * the total time for completing reading/writting all data.
3491 */
3492 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3493
3494 if (bytes_base + bytes_res <= bytes_limit) {
3495 if (wait) {
3496 *wait = 0;
3497 }
3498
3499 return false;
3500 }
3501
3502 /* Calc approx time to dispatch */
3503 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3504
3505 /* When the I/O rate at runtime exceeds the limits,
3506 * bs->slice_end need to be extended in order that the current statistic
3507 * info can be kept until the timer fire, so it is increased and tuned
3508 * based on the result of experiment.
3509 */
3510 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3511 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3512 if (wait) {
3513 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3514 }
3515
3516 return true;
3517}
3518
3519static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3520 double elapsed_time, uint64_t *wait)
3521{
3522 uint64_t iops_limit = 0;
3523 double ios_limit, ios_base;
3524 double slice_time, wait_time;
3525
3526 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3527 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3528 } else if (bs->io_limits.iops[is_write]) {
3529 iops_limit = bs->io_limits.iops[is_write];
3530 } else {
3531 if (wait) {
3532 *wait = 0;
3533 }
3534
3535 return false;
3536 }
3537
3538 slice_time = bs->slice_end - bs->slice_start;
3539 slice_time /= (NANOSECONDS_PER_SECOND);
3540 ios_limit = iops_limit * slice_time;
3541 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3542 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3543 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3544 }
3545
3546 if (ios_base + 1 <= ios_limit) {
3547 if (wait) {
3548 *wait = 0;
3549 }
3550
3551 return false;
3552 }
3553
3554 /* Calc approx time to dispatch */
3555 wait_time = (ios_base + 1) / iops_limit;
3556 if (wait_time > elapsed_time) {
3557 wait_time = wait_time - elapsed_time;
3558 } else {
3559 wait_time = 0;
3560 }
3561
3562 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3563 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3564 if (wait) {
3565 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3566 }
3567
3568 return true;
3569}
3570
3571static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3572 bool is_write, int64_t *wait)
3573{
3574 int64_t now, max_wait;
3575 uint64_t bps_wait = 0, iops_wait = 0;
3576 double elapsed_time;
3577 int bps_ret, iops_ret;
3578
3579 now = qemu_get_clock_ns(vm_clock);
3580 if ((bs->slice_start < now)
3581 && (bs->slice_end > now)) {
3582 bs->slice_end = now + bs->slice_time;
3583 } else {
3584 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3585 bs->slice_start = now;
3586 bs->slice_end = now + bs->slice_time;
3587
3588 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3589 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3590
3591 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3592 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3593 }
3594
3595 elapsed_time = now - bs->slice_start;
3596 elapsed_time /= (NANOSECONDS_PER_SECOND);
3597
3598 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3599 is_write, elapsed_time, &bps_wait);
3600 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3601 elapsed_time, &iops_wait);
3602 if (bps_ret || iops_ret) {
3603 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3604 if (wait) {
3605 *wait = max_wait;
3606 }
3607
3608 now = qemu_get_clock_ns(vm_clock);
3609 if (bs->slice_end < now + max_wait) {
3610 bs->slice_end = now + max_wait;
3611 }
3612
3613 return true;
3614 }
3615
3616 if (wait) {
3617 *wait = 0;
3618 }
3619
3620 return false;
3621}
pbrookce1a14d2006-08-07 02:38:06 +00003622
bellard83f64092006-08-01 16:21:11 +00003623/**************************************************************/
3624/* async block device emulation */
3625
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003626typedef struct BlockDriverAIOCBSync {
3627 BlockDriverAIOCB common;
3628 QEMUBH *bh;
3629 int ret;
3630 /* vector translation state */
3631 QEMUIOVector *qiov;
3632 uint8_t *bounce;
3633 int is_write;
3634} BlockDriverAIOCBSync;
3635
3636static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3637{
Kevin Wolfb666d232010-05-05 11:44:39 +02003638 BlockDriverAIOCBSync *acb =
3639 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003640 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003641 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003642 qemu_aio_release(acb);
3643}
3644
3645static AIOPool bdrv_em_aio_pool = {
3646 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3647 .cancel = bdrv_aio_cancel_em,
3648};
3649
bellard83f64092006-08-01 16:21:11 +00003650static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003651{
pbrookce1a14d2006-08-07 02:38:06 +00003652 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003653
aliguorif141eaf2009-04-07 18:43:24 +00003654 if (!acb->is_write)
Michael Tokarev03396142012-06-07 20:17:55 +04003655 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003656 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003657 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003658 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003659 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003660 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003661}
bellardbeac80c2006-06-26 20:08:57 +00003662
aliguorif141eaf2009-04-07 18:43:24 +00003663static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3664 int64_t sector_num,
3665 QEMUIOVector *qiov,
3666 int nb_sectors,
3667 BlockDriverCompletionFunc *cb,
3668 void *opaque,
3669 int is_write)
3670
bellardea2384d2004-08-01 21:59:26 +00003671{
pbrookce1a14d2006-08-07 02:38:06 +00003672 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003673
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003674 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003675 acb->is_write = is_write;
3676 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003677 acb->bounce = qemu_blockalign(bs, qiov->size);
Paolo Bonzini3f3aace2011-11-14 17:50:54 +01003678 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003679
3680 if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04003681 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003682 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003683 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003684 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003685 }
3686
pbrookce1a14d2006-08-07 02:38:06 +00003687 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003688
pbrookce1a14d2006-08-07 02:38:06 +00003689 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003690}
3691
aliguorif141eaf2009-04-07 18:43:24 +00003692static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3693 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003694 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003695{
aliguorif141eaf2009-04-07 18:43:24 +00003696 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003697}
3698
aliguorif141eaf2009-04-07 18:43:24 +00003699static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3700 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3701 BlockDriverCompletionFunc *cb, void *opaque)
3702{
3703 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3704}
3705
Kevin Wolf68485422011-06-30 10:05:46 +02003706
3707typedef struct BlockDriverAIOCBCoroutine {
3708 BlockDriverAIOCB common;
3709 BlockRequest req;
3710 bool is_write;
3711 QEMUBH* bh;
3712} BlockDriverAIOCBCoroutine;
3713
3714static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3715{
3716 qemu_aio_flush();
3717}
3718
3719static AIOPool bdrv_em_co_aio_pool = {
3720 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3721 .cancel = bdrv_aio_co_cancel_em,
3722};
3723
Paolo Bonzini35246a62011-10-14 10:41:29 +02003724static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003725{
3726 BlockDriverAIOCBCoroutine *acb = opaque;
3727
3728 acb->common.cb(acb->common.opaque, acb->req.error);
3729 qemu_bh_delete(acb->bh);
3730 qemu_aio_release(acb);
3731}
3732
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003733/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3734static void coroutine_fn bdrv_co_do_rw(void *opaque)
3735{
3736 BlockDriverAIOCBCoroutine *acb = opaque;
3737 BlockDriverState *bs = acb->common.bs;
3738
3739 if (!acb->is_write) {
3740 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003741 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003742 } else {
3743 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003744 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003745 }
3746
Paolo Bonzini35246a62011-10-14 10:41:29 +02003747 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003748 qemu_bh_schedule(acb->bh);
3749}
3750
Kevin Wolf68485422011-06-30 10:05:46 +02003751static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3752 int64_t sector_num,
3753 QEMUIOVector *qiov,
3754 int nb_sectors,
3755 BlockDriverCompletionFunc *cb,
3756 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003757 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003758{
3759 Coroutine *co;
3760 BlockDriverAIOCBCoroutine *acb;
3761
3762 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3763 acb->req.sector = sector_num;
3764 acb->req.nb_sectors = nb_sectors;
3765 acb->req.qiov = qiov;
3766 acb->is_write = is_write;
3767
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003768 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003769 qemu_coroutine_enter(co, acb);
3770
3771 return &acb->common;
3772}
3773
Paolo Bonzini07f07612011-10-17 12:32:12 +02003774static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003775{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003776 BlockDriverAIOCBCoroutine *acb = opaque;
3777 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003778
Paolo Bonzini07f07612011-10-17 12:32:12 +02003779 acb->req.error = bdrv_co_flush(bs);
3780 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003781 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003782}
3783
Paolo Bonzini07f07612011-10-17 12:32:12 +02003784BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003785 BlockDriverCompletionFunc *cb, void *opaque)
3786{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003787 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003788
Paolo Bonzini07f07612011-10-17 12:32:12 +02003789 Coroutine *co;
3790 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003791
Paolo Bonzini07f07612011-10-17 12:32:12 +02003792 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3793 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3794 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003795
Alexander Graf016f5cf2010-05-26 17:51:49 +02003796 return &acb->common;
3797}
3798
Paolo Bonzini4265d622011-10-17 12:32:14 +02003799static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3800{
3801 BlockDriverAIOCBCoroutine *acb = opaque;
3802 BlockDriverState *bs = acb->common.bs;
3803
3804 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3805 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3806 qemu_bh_schedule(acb->bh);
3807}
3808
3809BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3810 int64_t sector_num, int nb_sectors,
3811 BlockDriverCompletionFunc *cb, void *opaque)
3812{
3813 Coroutine *co;
3814 BlockDriverAIOCBCoroutine *acb;
3815
3816 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3817
3818 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3819 acb->req.sector = sector_num;
3820 acb->req.nb_sectors = nb_sectors;
3821 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3822 qemu_coroutine_enter(co, acb);
3823
3824 return &acb->common;
3825}
3826
bellardea2384d2004-08-01 21:59:26 +00003827void bdrv_init(void)
3828{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003829 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003830}
pbrookce1a14d2006-08-07 02:38:06 +00003831
Markus Armbrustereb852012009-10-27 18:41:44 +01003832void bdrv_init_with_whitelist(void)
3833{
3834 use_bdrv_whitelist = 1;
3835 bdrv_init();
3836}
3837
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003838void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3839 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003840{
pbrookce1a14d2006-08-07 02:38:06 +00003841 BlockDriverAIOCB *acb;
3842
aliguori6bbff9a2009-03-20 18:25:59 +00003843 if (pool->free_aiocb) {
3844 acb = pool->free_aiocb;
3845 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003846 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003847 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003848 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003849 }
3850 acb->bs = bs;
3851 acb->cb = cb;
3852 acb->opaque = opaque;
3853 return acb;
3854}
3855
3856void qemu_aio_release(void *p)
3857{
aliguori6bbff9a2009-03-20 18:25:59 +00003858 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3859 AIOPool *pool = acb->pool;
3860 acb->next = pool->free_aiocb;
3861 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003862}
bellard19cb3732006-08-19 11:45:59 +00003863
3864/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003865/* Coroutine block device emulation */
3866
3867typedef struct CoroutineIOCompletion {
3868 Coroutine *coroutine;
3869 int ret;
3870} CoroutineIOCompletion;
3871
3872static void bdrv_co_io_em_complete(void *opaque, int ret)
3873{
3874 CoroutineIOCompletion *co = opaque;
3875
3876 co->ret = ret;
3877 qemu_coroutine_enter(co->coroutine, NULL);
3878}
3879
3880static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3881 int nb_sectors, QEMUIOVector *iov,
3882 bool is_write)
3883{
3884 CoroutineIOCompletion co = {
3885 .coroutine = qemu_coroutine_self(),
3886 };
3887 BlockDriverAIOCB *acb;
3888
3889 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003890 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3891 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003892 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003893 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3894 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003895 }
3896
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003897 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003898 if (!acb) {
3899 return -EIO;
3900 }
3901 qemu_coroutine_yield();
3902
3903 return co.ret;
3904}
3905
3906static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3907 int64_t sector_num, int nb_sectors,
3908 QEMUIOVector *iov)
3909{
3910 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3911}
3912
3913static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3914 int64_t sector_num, int nb_sectors,
3915 QEMUIOVector *iov)
3916{
3917 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3918}
3919
Paolo Bonzini07f07612011-10-17 12:32:12 +02003920static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003921{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003922 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003923
Paolo Bonzini07f07612011-10-17 12:32:12 +02003924 rwco->ret = bdrv_co_flush(rwco->bs);
3925}
3926
3927int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3928{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003929 int ret;
3930
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003931 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003932 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003933 }
3934
Kevin Wolfca716362011-11-10 18:13:59 +01003935 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003936 if (bs->drv->bdrv_co_flush_to_os) {
3937 ret = bs->drv->bdrv_co_flush_to_os(bs);
3938 if (ret < 0) {
3939 return ret;
3940 }
3941 }
3942
Kevin Wolfca716362011-11-10 18:13:59 +01003943 /* But don't actually force it to the disk with cache=unsafe */
3944 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02003945 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01003946 }
3947
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003948 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003949 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003950 } else if (bs->drv->bdrv_aio_flush) {
3951 BlockDriverAIOCB *acb;
3952 CoroutineIOCompletion co = {
3953 .coroutine = qemu_coroutine_self(),
3954 };
3955
3956 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3957 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003958 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003959 } else {
3960 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003961 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003962 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003963 } else {
3964 /*
3965 * Some block drivers always operate in either writethrough or unsafe
3966 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3967 * know how the server works (because the behaviour is hardcoded or
3968 * depends on server-side configuration), so we can't ensure that
3969 * everything is safe on disk. Returning an error doesn't work because
3970 * that would break guests even if the server operates in writethrough
3971 * mode.
3972 *
3973 * Let's hope the user knows what he's doing.
3974 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003975 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003976 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003977 if (ret < 0) {
3978 return ret;
3979 }
3980
3981 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3982 * in the case of cache=unsafe, so there are no useless flushes.
3983 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02003984flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003985 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003986}
3987
Anthony Liguori0f154232011-11-14 15:09:45 -06003988void bdrv_invalidate_cache(BlockDriverState *bs)
3989{
3990 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3991 bs->drv->bdrv_invalidate_cache(bs);
3992 }
3993}
3994
3995void bdrv_invalidate_cache_all(void)
3996{
3997 BlockDriverState *bs;
3998
3999 QTAILQ_FOREACH(bs, &bdrv_states, list) {
4000 bdrv_invalidate_cache(bs);
4001 }
4002}
4003
Benoît Canet07789262012-03-23 08:36:49 +01004004void bdrv_clear_incoming_migration_all(void)
4005{
4006 BlockDriverState *bs;
4007
4008 QTAILQ_FOREACH(bs, &bdrv_states, list) {
4009 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
4010 }
4011}
4012
Paolo Bonzini07f07612011-10-17 12:32:12 +02004013int bdrv_flush(BlockDriverState *bs)
4014{
4015 Coroutine *co;
4016 RwCo rwco = {
4017 .bs = bs,
4018 .ret = NOT_DONE,
4019 };
4020
4021 if (qemu_in_coroutine()) {
4022 /* Fast-path if already in coroutine context */
4023 bdrv_flush_co_entry(&rwco);
4024 } else {
4025 co = qemu_coroutine_create(bdrv_flush_co_entry);
4026 qemu_coroutine_enter(co, &rwco);
4027 while (rwco.ret == NOT_DONE) {
4028 qemu_aio_wait();
4029 }
4030 }
4031
4032 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004033}
4034
Paolo Bonzini4265d622011-10-17 12:32:14 +02004035static void coroutine_fn bdrv_discard_co_entry(void *opaque)
4036{
4037 RwCo *rwco = opaque;
4038
4039 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
4040}
4041
4042int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
4043 int nb_sectors)
4044{
4045 if (!bs->drv) {
4046 return -ENOMEDIUM;
4047 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
4048 return -EIO;
4049 } else if (bs->read_only) {
4050 return -EROFS;
4051 } else if (bs->drv->bdrv_co_discard) {
4052 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
4053 } else if (bs->drv->bdrv_aio_discard) {
4054 BlockDriverAIOCB *acb;
4055 CoroutineIOCompletion co = {
4056 .coroutine = qemu_coroutine_self(),
4057 };
4058
4059 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
4060 bdrv_co_io_em_complete, &co);
4061 if (acb == NULL) {
4062 return -EIO;
4063 } else {
4064 qemu_coroutine_yield();
4065 return co.ret;
4066 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02004067 } else {
4068 return 0;
4069 }
4070}
4071
4072int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
4073{
4074 Coroutine *co;
4075 RwCo rwco = {
4076 .bs = bs,
4077 .sector_num = sector_num,
4078 .nb_sectors = nb_sectors,
4079 .ret = NOT_DONE,
4080 };
4081
4082 if (qemu_in_coroutine()) {
4083 /* Fast-path if already in coroutine context */
4084 bdrv_discard_co_entry(&rwco);
4085 } else {
4086 co = qemu_coroutine_create(bdrv_discard_co_entry);
4087 qemu_coroutine_enter(co, &rwco);
4088 while (rwco.ret == NOT_DONE) {
4089 qemu_aio_wait();
4090 }
4091 }
4092
4093 return rwco.ret;
4094}
4095
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004096/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00004097/* removable device support */
4098
4099/**
4100 * Return TRUE if the media is present
4101 */
4102int bdrv_is_inserted(BlockDriverState *bs)
4103{
4104 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02004105
bellard19cb3732006-08-19 11:45:59 +00004106 if (!drv)
4107 return 0;
4108 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02004109 return 1;
4110 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00004111}
4112
4113/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02004114 * Return whether the media changed since the last call to this
4115 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00004116 */
4117int bdrv_media_changed(BlockDriverState *bs)
4118{
4119 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00004120
Markus Armbruster8e49ca42011-08-03 15:08:08 +02004121 if (drv && drv->bdrv_media_changed) {
4122 return drv->bdrv_media_changed(bs);
4123 }
4124 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00004125}
4126
4127/**
4128 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
4129 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02004130void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00004131{
4132 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00004133
Markus Armbruster822e1cd2011-07-20 18:23:42 +02004134 if (drv && drv->bdrv_eject) {
4135 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00004136 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02004137
4138 if (bs->device_name[0] != '\0') {
4139 bdrv_emit_qmp_eject_event(bs, eject_flag);
4140 }
bellard19cb3732006-08-19 11:45:59 +00004141}
4142
bellard19cb3732006-08-19 11:45:59 +00004143/**
4144 * Lock or unlock the media (if it is locked, the user won't be able
4145 * to eject it manually).
4146 */
Markus Armbruster025e8492011-09-06 18:58:47 +02004147void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00004148{
4149 BlockDriver *drv = bs->drv;
4150
Markus Armbruster025e8492011-09-06 18:58:47 +02004151 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01004152
Markus Armbruster025e8492011-09-06 18:58:47 +02004153 if (drv && drv->bdrv_lock_medium) {
4154 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00004155 }
4156}
ths985a03b2007-12-24 16:10:43 +00004157
4158/* needed for generic scsi interface */
4159
4160int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
4161{
4162 BlockDriver *drv = bs->drv;
4163
4164 if (drv && drv->bdrv_ioctl)
4165 return drv->bdrv_ioctl(bs, req, buf);
4166 return -ENOTSUP;
4167}
aliguori7d780662009-03-12 19:57:08 +00004168
aliguori221f7152009-03-28 17:28:41 +00004169BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
4170 unsigned long int req, void *buf,
4171 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00004172{
aliguori221f7152009-03-28 17:28:41 +00004173 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00004174
aliguori221f7152009-03-28 17:28:41 +00004175 if (drv && drv->bdrv_aio_ioctl)
4176 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
4177 return NULL;
aliguori7d780662009-03-12 19:57:08 +00004178}
aliguorie268ca52009-04-22 20:20:00 +00004179
Markus Armbruster7b6f9302011-09-06 18:58:56 +02004180void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
4181{
4182 bs->buffer_alignment = align;
4183}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02004184
aliguorie268ca52009-04-22 20:20:00 +00004185void *qemu_blockalign(BlockDriverState *bs, size_t size)
4186{
4187 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
4188}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02004189
4190void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
4191{
4192 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004193
Liran Schouraaa0eb72010-01-26 10:31:48 +02004194 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004195 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01004196 if (!bs->dirty_bitmap) {
4197 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
Paolo Bonzini71df14f2012-04-12 14:01:04 +02004198 BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
4199 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004200
Paolo Bonzini71df14f2012-04-12 14:01:04 +02004201 bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004202 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02004203 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01004204 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05004205 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01004206 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004207 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02004208 }
4209}
4210
4211int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
4212{
Jan Kiszka6ea44302009-11-30 18:21:19 +01004213 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004214
Jan Kiszkac6d22832009-11-30 18:21:20 +01004215 if (bs->dirty_bitmap &&
4216 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02004217 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
4218 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02004219 } else {
4220 return 0;
4221 }
4222}
4223
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004224void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
4225 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02004226{
4227 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
4228}
Liran Schouraaa0eb72010-01-26 10:31:48 +02004229
4230int64_t bdrv_get_dirty_count(BlockDriverState *bs)
4231{
4232 return bs->dirty_count;
4233}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004234
Marcelo Tosattidb593f22011-01-26 12:12:34 -02004235void bdrv_set_in_use(BlockDriverState *bs, int in_use)
4236{
4237 assert(bs->in_use != in_use);
4238 bs->in_use = in_use;
4239}
4240
4241int bdrv_in_use(BlockDriverState *bs)
4242{
4243 return bs->in_use;
4244}
4245
Luiz Capitulino28a72822011-09-26 17:43:50 -03004246void bdrv_iostatus_enable(BlockDriverState *bs)
4247{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03004248 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03004249 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03004250}
4251
4252/* The I/O status is only enabled if the drive explicitly
4253 * enables it _and_ the VM is configured to stop on errors */
4254bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
4255{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03004256 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02004257 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
4258 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
4259 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03004260}
4261
4262void bdrv_iostatus_disable(BlockDriverState *bs)
4263{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03004264 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03004265}
4266
4267void bdrv_iostatus_reset(BlockDriverState *bs)
4268{
4269 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03004270 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03004271 }
4272}
4273
Luiz Capitulino28a72822011-09-26 17:43:50 -03004274void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
4275{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02004276 assert(bdrv_iostatus_is_enabled(bs));
4277 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03004278 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
4279 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03004280 }
4281}
4282
Christoph Hellwiga597e792011-08-25 08:26:01 +02004283void
4284bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
4285 enum BlockAcctType type)
4286{
4287 assert(type < BDRV_MAX_IOTYPE);
4288
4289 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02004290 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02004291 cookie->type = type;
4292}
4293
4294void
4295bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
4296{
4297 assert(cookie->type < BDRV_MAX_IOTYPE);
4298
4299 bs->nr_bytes[cookie->type] += cookie->bytes;
4300 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02004301 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02004302}
4303
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004304int bdrv_img_create(const char *filename, const char *fmt,
4305 const char *base_filename, const char *base_fmt,
4306 char *options, uint64_t img_size, int flags)
4307{
4308 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02004309 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004310 BlockDriverState *bs = NULL;
4311 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004312 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004313 int ret = 0;
4314
4315 /* Find driver and parse its options */
4316 drv = bdrv_find_format(fmt);
4317 if (!drv) {
4318 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004319 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004320 goto out;
4321 }
4322
4323 proto_drv = bdrv_find_protocol(filename);
4324 if (!proto_drv) {
4325 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004326 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004327 goto out;
4328 }
4329
4330 create_options = append_option_parameters(create_options,
4331 drv->create_options);
4332 create_options = append_option_parameters(create_options,
4333 proto_drv->create_options);
4334
4335 /* Create parameter list with default values */
4336 param = parse_option_parameters("", create_options, param);
4337
4338 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
4339
4340 /* Parse -o options */
4341 if (options) {
4342 param = parse_option_parameters(options, create_options, param);
4343 if (param == NULL) {
4344 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004345 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004346 goto out;
4347 }
4348 }
4349
4350 if (base_filename) {
4351 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
4352 base_filename)) {
4353 error_report("Backing file not supported for file format '%s'",
4354 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004355 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004356 goto out;
4357 }
4358 }
4359
4360 if (base_fmt) {
4361 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
4362 error_report("Backing file format not supported for file "
4363 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004364 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004365 goto out;
4366 }
4367 }
4368
Jes Sorensen792da932010-12-16 13:52:17 +01004369 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
4370 if (backing_file && backing_file->value.s) {
4371 if (!strcmp(filename, backing_file->value.s)) {
4372 error_report("Error: Trying to create an image with the "
4373 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01004374 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01004375 goto out;
4376 }
4377 }
4378
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004379 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
4380 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004381 backing_drv = bdrv_find_format(backing_fmt->value.s);
4382 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004383 error_report("Unknown backing file format '%s'",
4384 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004385 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004386 goto out;
4387 }
4388 }
4389
4390 // The size for the image must always be specified, with one exception:
4391 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02004392 size = get_option_parameter(param, BLOCK_OPT_SIZE);
4393 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004394 if (backing_file && backing_file->value.s) {
4395 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004396 char buf[32];
Paolo Bonzini63090da2012-04-12 14:01:03 +02004397 int back_flags;
4398
4399 /* backing files always opened read-only */
4400 back_flags =
4401 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004402
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004403 bs = bdrv_new("");
4404
Paolo Bonzini63090da2012-04-12 14:01:03 +02004405 ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004406 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004407 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004408 goto out;
4409 }
4410 bdrv_get_geometry(bs, &size);
4411 size *= 512;
4412
4413 snprintf(buf, sizeof(buf), "%" PRId64, size);
4414 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4415 } else {
4416 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01004417 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004418 goto out;
4419 }
4420 }
4421
4422 printf("Formatting '%s', fmt=%s ", filename, fmt);
4423 print_option_parameters(param);
4424 puts("");
4425
4426 ret = bdrv_create(drv, filename, param);
4427
4428 if (ret < 0) {
4429 if (ret == -ENOTSUP) {
4430 error_report("Formatting or formatting option not supported for "
4431 "file format '%s'", fmt);
4432 } else if (ret == -EFBIG) {
4433 error_report("The image size is too large for file format '%s'",
4434 fmt);
4435 } else {
4436 error_report("%s: error while creating %s: %s", filename, fmt,
4437 strerror(-ret));
4438 }
4439 }
4440
4441out:
4442 free_option_parameters(create_options);
4443 free_option_parameters(param);
4444
4445 if (bs) {
4446 bdrv_delete(bs);
4447 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01004448
4449 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004450}