blob: 947a86f5afeff42671b85e10ae22f045ce40eed3 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini83c90892012-12-17 18:19:49 +010027#include "monitor/monitor.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010028#include "block/block_int.h"
29#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010030#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010031#include "qapi/qmp/qjson.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010032#include "sysemu/sysemu.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010033#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010034#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010035#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030036#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010037#include "qemu/timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000038
Juan Quintela71e72a12009-07-27 16:12:56 +020039#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000040#include <sys/types.h>
41#include <sys/stat.h>
42#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000043#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000044#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000045#include <sys/disk.h>
46#endif
blueswir1c5e97232009-03-07 20:06:23 +000047#endif
bellard7674e7b2005-04-26 21:59:26 +000048
aliguori49dc7682009-03-08 16:26:59 +000049#ifdef _WIN32
50#include <windows.h>
51#endif
52
Fam Zhenge4654d22013-11-13 18:29:43 +080053struct BdrvDirtyBitmap {
54 HBitmap *bitmap;
55 QLIST_ENTRY(BdrvDirtyBitmap) list;
56};
57
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010058#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
59
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020060static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000061static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
62 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000063 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000064static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000066 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020067static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors,
69 QEMUIOVector *iov);
70static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010073static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
74 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000075 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010076static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000078 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010079static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
80 int64_t sector_num,
81 QEMUIOVector *qiov,
82 int nb_sectors,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +010083 BdrvRequestFlags flags,
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010084 BlockDriverCompletionFunc *cb,
85 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010086 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010087static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010088static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +020089 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +000090
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010091static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000093
Benoît Canetdc364f42014-01-23 21:31:32 +010094static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
96
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010097static QLIST_HEAD(, BlockDriver) bdrv_drivers =
98 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000099
Markus Armbrustereb852012009-10-27 18:41:44 +0100100/* If non-zero, use only whitelisted block drivers */
101static int use_bdrv_whitelist;
102
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000103#ifdef _WIN32
104static int is_windows_drive_prefix(const char *filename)
105{
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109}
110
111int is_windows_drive(const char *filename)
112{
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120}
121#endif
122
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800123/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200124void bdrv_set_io_limits(BlockDriverState *bs,
125 ThrottleConfig *cfg)
126{
127 int i;
128
129 throttle_config(&bs->throttle_state, cfg);
130
131 for (i = 0; i < 2; i++) {
132 qemu_co_enter_next(&bs->throttled_reqs[i]);
133 }
134}
135
136/* this function drain all the throttled IOs */
137static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
138{
139 bool drained = false;
140 bool enabled = bs->io_limits_enabled;
141 int i;
142
143 bs->io_limits_enabled = false;
144
145 for (i = 0; i < 2; i++) {
146 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
147 drained = true;
148 }
149 }
150
151 bs->io_limits_enabled = enabled;
152
153 return drained;
154}
155
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800156void bdrv_io_limits_disable(BlockDriverState *bs)
157{
158 bs->io_limits_enabled = false;
159
Benoît Canetcc0681c2013-09-02 14:14:39 +0200160 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800161
Benoît Canetcc0681c2013-09-02 14:14:39 +0200162 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800163}
164
Benoît Canetcc0681c2013-09-02 14:14:39 +0200165static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800166{
167 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200168 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800169}
170
Benoît Canetcc0681c2013-09-02 14:14:39 +0200171static void bdrv_throttle_write_timer_cb(void *opaque)
172{
173 BlockDriverState *bs = opaque;
174 qemu_co_enter_next(&bs->throttled_reqs[1]);
175}
176
177/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800178void bdrv_io_limits_enable(BlockDriverState *bs)
179{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200180 assert(!bs->io_limits_enabled);
181 throttle_init(&bs->throttle_state,
182 QEMU_CLOCK_VIRTUAL,
183 bdrv_throttle_read_timer_cb,
184 bdrv_throttle_write_timer_cb,
185 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800186 bs->io_limits_enabled = true;
187}
188
Benoît Canetcc0681c2013-09-02 14:14:39 +0200189/* This function makes an IO wait if needed
190 *
191 * @nb_sectors: the number of sectors of the IO
192 * @is_write: is the IO a write
193 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800194static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100195 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200196 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800197{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200198 /* does this io must wait */
199 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800200
Benoît Canetcc0681c2013-09-02 14:14:39 +0200201 /* if must wait or any request of this type throttled queue the IO */
202 if (must_wait ||
203 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
204 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800205 }
206
Benoît Canetcc0681c2013-09-02 14:14:39 +0200207 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100208 throttle_account(&bs->throttle_state, is_write, bytes);
209
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800210
Benoît Canetcc0681c2013-09-02 14:14:39 +0200211 /* if the next request must wait -> do nothing */
212 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
213 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800214 }
215
Benoît Canetcc0681c2013-09-02 14:14:39 +0200216 /* else queue next request for execution */
217 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800218}
219
Kevin Wolf339064d2013-11-28 10:23:32 +0100220size_t bdrv_opt_mem_align(BlockDriverState *bs)
221{
222 if (!bs || !bs->drv) {
223 /* 4k should be on the safe side */
224 return 4096;
225 }
226
227 return bs->bl.opt_mem_alignment;
228}
229
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000230/* check if the path starts with "<protocol>:" */
231static int path_has_protocol(const char *path)
232{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200233 const char *p;
234
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000235#ifdef _WIN32
236 if (is_windows_drive(path) ||
237 is_windows_drive_prefix(path)) {
238 return 0;
239 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200240 p = path + strcspn(path, ":/\\");
241#else
242 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000243#endif
244
Paolo Bonzini947995c2012-05-08 16:51:48 +0200245 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000246}
247
bellard83f64092006-08-01 16:21:11 +0000248int path_is_absolute(const char *path)
249{
bellard21664422007-01-07 18:22:37 +0000250#ifdef _WIN32
251 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200252 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000253 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200254 }
255 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000256#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200257 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000258#endif
bellard83f64092006-08-01 16:21:11 +0000259}
260
261/* if filename is absolute, just copy it to dest. Otherwise, build a
262 path to it by considering it is relative to base_path. URL are
263 supported. */
264void path_combine(char *dest, int dest_size,
265 const char *base_path,
266 const char *filename)
267{
268 const char *p, *p1;
269 int len;
270
271 if (dest_size <= 0)
272 return;
273 if (path_is_absolute(filename)) {
274 pstrcpy(dest, dest_size, filename);
275 } else {
276 p = strchr(base_path, ':');
277 if (p)
278 p++;
279 else
280 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000281 p1 = strrchr(base_path, '/');
282#ifdef _WIN32
283 {
284 const char *p2;
285 p2 = strrchr(base_path, '\\');
286 if (!p1 || p2 > p1)
287 p1 = p2;
288 }
289#endif
bellard83f64092006-08-01 16:21:11 +0000290 if (p1)
291 p1++;
292 else
293 p1 = base_path;
294 if (p1 > p)
295 p = p1;
296 len = p - base_path;
297 if (len > dest_size - 1)
298 len = dest_size - 1;
299 memcpy(dest, base_path, len);
300 dest[len] = '\0';
301 pstrcat(dest, dest_size, filename);
302 }
303}
304
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200305void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
306{
307 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
308 pstrcpy(dest, sz, bs->backing_file);
309 } else {
310 path_combine(dest, sz, bs->filename, bs->backing_file);
311 }
312}
313
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500314void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000315{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100316 /* Block drivers without coroutine functions need emulation */
317 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200318 bdrv->bdrv_co_readv = bdrv_co_readv_em;
319 bdrv->bdrv_co_writev = bdrv_co_writev_em;
320
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100321 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
322 * the block driver lacks aio we need to emulate that too.
323 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200324 if (!bdrv->bdrv_aio_readv) {
325 /* add AIO emulation layer */
326 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
327 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200328 }
bellard83f64092006-08-01 16:21:11 +0000329 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200330
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100331 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000332}
bellardb3380822004-03-14 21:38:54 +0000333
334/* create a new block device (by default it is empty) */
335BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000336{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100337 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000338
Anthony Liguori7267c092011-08-20 22:09:37 -0500339 bs = g_malloc0(sizeof(BlockDriverState));
Fam Zhenge4654d22013-11-13 18:29:43 +0800340 QLIST_INIT(&bs->dirty_bitmaps);
bellardb3380822004-03-14 21:38:54 +0000341 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000342 if (device_name[0] != '\0') {
Benoît Canetdc364f42014-01-23 21:31:32 +0100343 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
bellardea2384d2004-08-01 21:59:26 +0000344 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300345 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200346 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200347 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200348 qemu_co_queue_init(&bs->throttled_reqs[0]);
349 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800350 bs->refcnt = 1;
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200351
bellardb3380822004-03-14 21:38:54 +0000352 return bs;
353}
354
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200355void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
356{
357 notifier_list_add(&bs->close_notifiers, notify);
358}
359
bellardea2384d2004-08-01 21:59:26 +0000360BlockDriver *bdrv_find_format(const char *format_name)
361{
362 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100363 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
364 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000365 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100366 }
bellardea2384d2004-08-01 21:59:26 +0000367 }
368 return NULL;
369}
370
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800371static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100372{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800373 static const char *whitelist_rw[] = {
374 CONFIG_BDRV_RW_WHITELIST
375 };
376 static const char *whitelist_ro[] = {
377 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100378 };
379 const char **p;
380
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800381 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100382 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800383 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100384
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800385 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100386 if (!strcmp(drv->format_name, *p)) {
387 return 1;
388 }
389 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800390 if (read_only) {
391 for (p = whitelist_ro; *p; p++) {
392 if (!strcmp(drv->format_name, *p)) {
393 return 1;
394 }
395 }
396 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100397 return 0;
398}
399
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800400BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
401 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100402{
403 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800404 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100405}
406
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800407typedef struct CreateCo {
408 BlockDriver *drv;
409 char *filename;
410 QEMUOptionParameter *options;
411 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200412 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800413} CreateCo;
414
415static void coroutine_fn bdrv_create_co_entry(void *opaque)
416{
Max Reitzcc84d902013-09-06 17:14:26 +0200417 Error *local_err = NULL;
418 int ret;
419
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800420 CreateCo *cco = opaque;
421 assert(cco->drv);
422
Max Reitzcc84d902013-09-06 17:14:26 +0200423 ret = cco->drv->bdrv_create(cco->filename, cco->options, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100424 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200425 error_propagate(&cco->err, local_err);
426 }
427 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800428}
429
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200430int bdrv_create(BlockDriver *drv, const char* filename,
Max Reitzcc84d902013-09-06 17:14:26 +0200431 QEMUOptionParameter *options, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000432{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800433 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200434
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800435 Coroutine *co;
436 CreateCo cco = {
437 .drv = drv,
438 .filename = g_strdup(filename),
439 .options = options,
440 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200441 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800442 };
443
444 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200445 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300446 ret = -ENOTSUP;
447 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800448 }
449
450 if (qemu_in_coroutine()) {
451 /* Fast-path if already in coroutine context */
452 bdrv_create_co_entry(&cco);
453 } else {
454 co = qemu_coroutine_create(bdrv_create_co_entry);
455 qemu_coroutine_enter(co, &cco);
456 while (cco.ret == NOT_DONE) {
457 qemu_aio_wait();
458 }
459 }
460
461 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200462 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100463 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200464 error_propagate(errp, cco.err);
465 } else {
466 error_setg_errno(errp, -ret, "Could not create image");
467 }
468 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800469
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300470out:
471 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800472 return ret;
bellardea2384d2004-08-01 21:59:26 +0000473}
474
Max Reitzcc84d902013-09-06 17:14:26 +0200475int bdrv_create_file(const char* filename, QEMUOptionParameter *options,
476 Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200477{
478 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200479 Error *local_err = NULL;
480 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200481
Kevin Wolf98289622013-07-10 15:47:39 +0200482 drv = bdrv_find_protocol(filename, true);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200483 if (drv == NULL) {
Max Reitzcc84d902013-09-06 17:14:26 +0200484 error_setg(errp, "Could not find protocol for file '%s'", filename);
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000485 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200486 }
487
Max Reitzcc84d902013-09-06 17:14:26 +0200488 ret = bdrv_create(drv, filename, options, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100489 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200490 error_propagate(errp, local_err);
491 }
492 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200493}
494
Kevin Wolf355ef4a2013-12-11 20:14:09 +0100495int bdrv_refresh_limits(BlockDriverState *bs)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100496{
497 BlockDriver *drv = bs->drv;
498
499 memset(&bs->bl, 0, sizeof(bs->bl));
500
Kevin Wolf466ad822013-12-11 19:50:32 +0100501 if (!drv) {
502 return 0;
503 }
504
505 /* Take some limits from the children as a default */
506 if (bs->file) {
507 bdrv_refresh_limits(bs->file);
508 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100509 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
510 } else {
511 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100512 }
513
514 if (bs->backing_hd) {
515 bdrv_refresh_limits(bs->backing_hd);
516 bs->bl.opt_transfer_length =
517 MAX(bs->bl.opt_transfer_length,
518 bs->backing_hd->bl.opt_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100519 bs->bl.opt_mem_alignment =
520 MAX(bs->bl.opt_mem_alignment,
521 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100522 }
523
524 /* Then let the driver override it */
525 if (drv->bdrv_refresh_limits) {
Kevin Wolfd34682c2013-12-11 19:26:16 +0100526 return drv->bdrv_refresh_limits(bs);
527 }
528
529 return 0;
530}
531
Jim Meyeringeba25052012-05-28 09:27:54 +0200532/*
533 * Create a uniquely-named empty temporary file.
534 * Return 0 upon success, otherwise a negative errno value.
535 */
536int get_tmp_filename(char *filename, int size)
537{
bellardd5249392004-08-03 21:14:23 +0000538#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000539 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200540 /* GetTempFileName requires that its output buffer (4th param)
541 have length MAX_PATH or greater. */
542 assert(size >= MAX_PATH);
543 return (GetTempPath(MAX_PATH, temp_dir)
544 && GetTempFileName(temp_dir, "qem", 0, filename)
545 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000546#else
bellardea2384d2004-08-01 21:59:26 +0000547 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000548 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000549 tmpdir = getenv("TMPDIR");
550 if (!tmpdir)
551 tmpdir = "/tmp";
Jim Meyeringeba25052012-05-28 09:27:54 +0200552 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
553 return -EOVERFLOW;
554 }
bellardea2384d2004-08-01 21:59:26 +0000555 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800556 if (fd < 0) {
557 return -errno;
558 }
559 if (close(fd) != 0) {
560 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200561 return -errno;
562 }
563 return 0;
bellardd5249392004-08-03 21:14:23 +0000564#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200565}
bellardea2384d2004-08-01 21:59:26 +0000566
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200567/*
568 * Detect host devices. By convention, /dev/cdrom[N] is always
569 * recognized as a host CDROM.
570 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200571static BlockDriver *find_hdev_driver(const char *filename)
572{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200573 int score_max = 0, score;
574 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200575
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100576 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200577 if (d->bdrv_probe_device) {
578 score = d->bdrv_probe_device(filename);
579 if (score > score_max) {
580 score_max = score;
581 drv = d;
582 }
583 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200584 }
585
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200586 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200587}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200588
Kevin Wolf98289622013-07-10 15:47:39 +0200589BlockDriver *bdrv_find_protocol(const char *filename,
590 bool allow_protocol_prefix)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200591{
592 BlockDriver *drv1;
593 char protocol[128];
594 int len;
595 const char *p;
596
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200597 /* TODO Drivers without bdrv_file_open must be specified explicitly */
598
Christoph Hellwig39508e72010-06-23 12:25:17 +0200599 /*
600 * XXX(hch): we really should not let host device detection
601 * override an explicit protocol specification, but moving this
602 * later breaks access to device names with colons in them.
603 * Thanks to the brain-dead persistent naming schemes on udev-
604 * based Linux systems those actually are quite common.
605 */
606 drv1 = find_hdev_driver(filename);
607 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200608 return drv1;
609 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200610
Kevin Wolf98289622013-07-10 15:47:39 +0200611 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200612 return bdrv_find_format("file");
613 }
Kevin Wolf98289622013-07-10 15:47:39 +0200614
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000615 p = strchr(filename, ':');
616 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200617 len = p - filename;
618 if (len > sizeof(protocol) - 1)
619 len = sizeof(protocol) - 1;
620 memcpy(protocol, filename, len);
621 protocol[len] = '\0';
622 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
623 if (drv1->protocol_name &&
624 !strcmp(drv1->protocol_name, protocol)) {
625 return drv1;
626 }
627 }
628 return NULL;
629}
630
Kevin Wolff500a6d2012-11-12 17:35:27 +0100631static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200632 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000633{
Kevin Wolff500a6d2012-11-12 17:35:27 +0100634 int score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000635 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000636 uint8_t buf[2048];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100637 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700638
Kevin Wolf08a00552010-06-01 18:37:31 +0200639 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100640 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200641 drv = bdrv_find_format("raw");
642 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200643 error_setg(errp, "Could not find raw image format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200644 ret = -ENOENT;
645 }
646 *pdrv = drv;
647 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700648 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700649
bellard83f64092006-08-01 16:21:11 +0000650 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000651 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200652 error_setg_errno(errp, -ret, "Could not read image for determining its "
653 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200654 *pdrv = NULL;
655 return ret;
bellard83f64092006-08-01 16:21:11 +0000656 }
657
bellardea2384d2004-08-01 21:59:26 +0000658 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200659 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100660 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000661 if (drv1->bdrv_probe) {
662 score = drv1->bdrv_probe(buf, ret, filename);
663 if (score > score_max) {
664 score_max = score;
665 drv = drv1;
666 }
bellardea2384d2004-08-01 21:59:26 +0000667 }
668 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200669 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200670 error_setg(errp, "Could not determine image format: No compatible "
671 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200672 ret = -ENOENT;
673 }
674 *pdrv = drv;
675 return ret;
bellardea2384d2004-08-01 21:59:26 +0000676}
677
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100678/**
679 * Set the current 'total_sectors' value
680 */
681static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
682{
683 BlockDriver *drv = bs->drv;
684
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700685 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
686 if (bs->sg)
687 return 0;
688
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100689 /* query actual device if possible, otherwise just trust the hint */
690 if (drv->bdrv_getlength) {
691 int64_t length = drv->bdrv_getlength(bs);
692 if (length < 0) {
693 return length;
694 }
Fam Zheng7e382002013-11-06 19:48:06 +0800695 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100696 }
697
698 bs->total_sectors = hint;
699 return 0;
700}
701
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100702/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100703 * Set open flags for a given discard mode
704 *
705 * Return 0 on success, -1 if the discard mode was invalid.
706 */
707int bdrv_parse_discard_flags(const char *mode, int *flags)
708{
709 *flags &= ~BDRV_O_UNMAP;
710
711 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
712 /* do nothing */
713 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
714 *flags |= BDRV_O_UNMAP;
715 } else {
716 return -1;
717 }
718
719 return 0;
720}
721
722/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100723 * Set open flags for a given cache mode
724 *
725 * Return 0 on success, -1 if the cache mode was invalid.
726 */
727int bdrv_parse_cache_flags(const char *mode, int *flags)
728{
729 *flags &= ~BDRV_O_CACHE_MASK;
730
731 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
732 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100733 } else if (!strcmp(mode, "directsync")) {
734 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100735 } else if (!strcmp(mode, "writeback")) {
736 *flags |= BDRV_O_CACHE_WB;
737 } else if (!strcmp(mode, "unsafe")) {
738 *flags |= BDRV_O_CACHE_WB;
739 *flags |= BDRV_O_NO_FLUSH;
740 } else if (!strcmp(mode, "writethrough")) {
741 /* this is the default */
742 } else {
743 return -1;
744 }
745
746 return 0;
747}
748
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000749/**
750 * The copy-on-read flag is actually a reference count so multiple users may
751 * use the feature without worrying about clobbering its previous state.
752 * Copy-on-read stays enabled until all users have called to disable it.
753 */
754void bdrv_enable_copy_on_read(BlockDriverState *bs)
755{
756 bs->copy_on_read++;
757}
758
759void bdrv_disable_copy_on_read(BlockDriverState *bs)
760{
761 assert(bs->copy_on_read > 0);
762 bs->copy_on_read--;
763}
764
Kevin Wolf7b272452012-11-12 17:05:39 +0100765static int bdrv_open_flags(BlockDriverState *bs, int flags)
766{
767 int open_flags = flags | BDRV_O_CACHE_WB;
768
769 /*
770 * Clear flags that are internal to the block layer before opening the
771 * image.
772 */
773 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
774
775 /*
776 * Snapshots should be writable.
777 */
778 if (bs->is_temporary) {
779 open_flags |= BDRV_O_RDWR;
780 }
781
782 return open_flags;
783}
784
Benoît Canet6913c0c2014-01-23 21:31:33 +0100785static int bdrv_assign_node_name(BlockDriverState *bs,
786 const char *node_name,
787 Error **errp)
788{
789 if (!node_name) {
790 return 0;
791 }
792
793 /* empty string node name is invalid */
794 if (node_name[0] == '\0') {
795 error_setg(errp, "Empty node name");
796 return -EINVAL;
797 }
798
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100799 /* takes care of avoiding namespaces collisions */
800 if (bdrv_find(node_name)) {
801 error_setg(errp, "node-name=%s is conflicting with a device id",
802 node_name);
803 return -EINVAL;
804 }
805
Benoît Canet6913c0c2014-01-23 21:31:33 +0100806 /* takes care of avoiding duplicates node names */
807 if (bdrv_find_node(node_name)) {
808 error_setg(errp, "Duplicate node name");
809 return -EINVAL;
810 }
811
812 /* copy node name into the bs and insert it into the graph list */
813 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
814 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
815
816 return 0;
817}
818
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200819/*
Kevin Wolf57915332010-04-14 15:24:50 +0200820 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100821 *
822 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200823 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100824static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200825 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200826{
827 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200828 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100829 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200830 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200831
832 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200833 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100834 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200835
Kevin Wolf45673672013-04-22 17:48:40 +0200836 if (file != NULL) {
837 filename = file->filename;
838 } else {
839 filename = qdict_get_try_str(options, "filename");
840 }
841
Kevin Wolf765003d2014-02-03 14:49:42 +0100842 if (drv->bdrv_needs_filename && !filename) {
843 error_setg(errp, "The '%s' block driver requires a file name",
844 drv->format_name);
845 return -EINVAL;
846 }
847
Kevin Wolf45673672013-04-22 17:48:40 +0200848 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100849
Benoît Canet6913c0c2014-01-23 21:31:33 +0100850 node_name = qdict_get_try_str(options, "node-name");
851 ret = bdrv_assign_node_name(bs, node_name, errp);
852 if (ret < 0) {
853 return ret;
854 }
855 qdict_del(options, "node-name");
856
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100857 /* bdrv_open() with directly using a protocol as drv. This layer is already
858 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
859 * and return immediately. */
860 if (file != NULL && drv->bdrv_file_open) {
861 bdrv_swap(file, bs);
862 return 0;
863 }
864
Kevin Wolf57915332010-04-14 15:24:50 +0200865 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +0100866 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +0100867 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +0800868 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800869 open_flags = bdrv_open_flags(bs, flags);
870 bs->read_only = !(open_flags & BDRV_O_RDWR);
871
872 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +0200873 error_setg(errp,
874 !bs->read_only && bdrv_is_whitelisted(drv, true)
875 ? "Driver '%s' can only be used for read-only devices"
876 : "Driver '%s' is not whitelisted",
877 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800878 return -ENOTSUP;
879 }
Kevin Wolf57915332010-04-14 15:24:50 +0200880
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000881 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +0200882 if (flags & BDRV_O_COPY_ON_READ) {
883 if (!bs->read_only) {
884 bdrv_enable_copy_on_read(bs);
885 } else {
886 error_setg(errp, "Can't use copy-on-read on read-only device");
887 return -EINVAL;
888 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000889 }
890
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100891 if (filename != NULL) {
892 pstrcpy(bs->filename, sizeof(bs->filename), filename);
893 } else {
894 bs->filename[0] = '\0';
895 }
Kevin Wolf57915332010-04-14 15:24:50 +0200896
Kevin Wolf57915332010-04-14 15:24:50 +0200897 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500898 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200899
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100900 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100901
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200902 /* Open the image, either directly or using a protocol */
903 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100904 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +0200905 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200906 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +0100907 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200908 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200909 error_setg(errp, "Can't use '%s' as a block driver for the "
910 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200911 ret = -EINVAL;
912 goto free_and_fail;
913 }
Kevin Wolff500a6d2012-11-12 17:35:27 +0100914 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200915 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200916 }
917
Kevin Wolf57915332010-04-14 15:24:50 +0200918 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100919 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200920 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +0800921 } else if (bs->filename[0]) {
922 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200923 } else {
924 error_setg_errno(errp, -ret, "Could not open image");
925 }
Kevin Wolf57915332010-04-14 15:24:50 +0200926 goto free_and_fail;
927 }
928
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100929 ret = refresh_total_sectors(bs, bs->total_sectors);
930 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200931 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100932 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200933 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100934
Kevin Wolfd34682c2013-12-11 19:26:16 +0100935 bdrv_refresh_limits(bs);
Paolo Bonzinic25f53b2011-11-29 12:42:20 +0100936 assert(bdrv_opt_mem_align(bs) != 0);
937 assert(bs->request_alignment != 0);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100938
Kevin Wolf57915332010-04-14 15:24:50 +0200939#ifndef _WIN32
940 if (bs->is_temporary) {
Dunrong Huangd4cea8d2013-10-03 01:31:27 +0800941 assert(bs->filename[0] != '\0');
942 unlink(bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +0200943 }
944#endif
945 return 0;
946
947free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +0100948 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -0500949 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200950 bs->opaque = NULL;
951 bs->drv = NULL;
952 return ret;
953}
954
955/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200956 * Opens a file using a protocol (file, host_device, nbd, ...)
Kevin Wolf787e4a82013-03-06 11:52:48 +0100957 *
958 * options is a QDict of options to pass to the block drivers, or NULL for an
959 * empty set of options. The reference to the QDict belongs to the block layer
960 * after the call (even on failure), so if the caller intends to reuse the
961 * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200962 */
Max Reitzd4446ea2014-02-18 18:33:09 +0100963static int bdrv_file_open(BlockDriverState *bs, const char *filename,
Max Reitz5d12aa62014-02-18 18:33:08 +0100964 QDict *options, int flags, Error **errp)
bellardb3380822004-03-14 21:38:54 +0000965{
Christoph Hellwig6db95602010-04-05 16:53:57 +0200966 BlockDriver *drv;
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100967 const char *drvname;
Kevin Wolf98289622013-07-10 15:47:39 +0200968 bool allow_protocol_prefix = false;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200969 Error *local_err = NULL;
bellard83f64092006-08-01 16:21:11 +0000970 int ret;
971
Kevin Wolf035fccd2013-04-09 14:34:19 +0200972 /* Fetch the file name from the options QDict if necessary */
973 if (!filename) {
974 filename = qdict_get_try_str(options, "filename");
975 } else if (filename && !qdict_haskey(options, "filename")) {
976 qdict_put(options, "filename", qstring_from_str(filename));
Kevin Wolf98289622013-07-10 15:47:39 +0200977 allow_protocol_prefix = true;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200978 } else {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200979 error_setg(errp, "Can't specify 'file' and 'filename' options at the "
980 "same time");
Kevin Wolf035fccd2013-04-09 14:34:19 +0200981 ret = -EINVAL;
982 goto fail;
983 }
984
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100985 /* Find the right block driver */
986 drvname = qdict_get_try_str(options, "driver");
987 if (drvname) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +0200988 drv = bdrv_find_format(drvname);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200989 if (!drv) {
990 error_setg(errp, "Unknown driver '%s'", drvname);
991 }
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100992 qdict_del(options, "driver");
993 } else if (filename) {
Kevin Wolf98289622013-07-10 15:47:39 +0200994 drv = bdrv_find_protocol(filename, allow_protocol_prefix);
995 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200996 error_setg(errp, "Unknown protocol");
Kevin Wolf98289622013-07-10 15:47:39 +0200997 }
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100998 } else {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200999 error_setg(errp, "Must specify either driver or file");
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001000 drv = NULL;
1001 }
1002
1003 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001004 /* errp has been set already */
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001005 ret = -ENOENT;
1006 goto fail;
1007 }
1008
1009 /* Parse the filename and open it */
1010 if (drv->bdrv_parse_filename && filename) {
Kevin Wolf6963a302013-03-15 18:47:22 +01001011 drv->bdrv_parse_filename(filename, options, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001012 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001013 error_propagate(errp, local_err);
Kevin Wolf6963a302013-03-15 18:47:22 +01001014 ret = -EINVAL;
1015 goto fail;
1016 }
Kevin Wolf56d1b4d2013-04-12 20:02:37 +02001017 qdict_del(options, "filename");
Kevin Wolf6963a302013-03-15 18:47:22 +01001018 }
1019
Max Reitz505d7582013-12-20 19:28:13 +01001020 if (!drv->bdrv_file_open) {
Max Reitzddf56362014-02-18 18:33:06 +01001021 ret = bdrv_open(&bs, filename, NULL, options, flags, drv, &local_err);
Max Reitz505d7582013-12-20 19:28:13 +01001022 options = NULL;
1023 } else {
1024 ret = bdrv_open_common(bs, NULL, options, flags, drv, &local_err);
1025 }
Kevin Wolf707ff822013-03-06 12:20:31 +01001026 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001027 error_propagate(errp, local_err);
Kevin Wolf707ff822013-03-06 12:20:31 +01001028 goto fail;
1029 }
1030
1031 /* Check if any unknown options were used */
Max Reitz505d7582013-12-20 19:28:13 +01001032 if (options && (qdict_size(options) != 0)) {
Kevin Wolf707ff822013-03-06 12:20:31 +01001033 const QDictEntry *entry = qdict_first(options);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001034 error_setg(errp, "Block protocol '%s' doesn't support the option '%s'",
1035 drv->format_name, entry->key);
Kevin Wolf707ff822013-03-06 12:20:31 +01001036 ret = -EINVAL;
1037 goto fail;
1038 }
1039 QDECREF(options);
1040
aliguori71d07702009-03-03 17:37:16 +00001041 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +00001042 return 0;
Kevin Wolf707ff822013-03-06 12:20:31 +01001043
1044fail:
1045 QDECREF(options);
Kevin Wolf707ff822013-03-06 12:20:31 +01001046 return ret;
bellardea2384d2004-08-01 21:59:26 +00001047}
bellardfc01f7e2003-06-30 10:03:06 +00001048
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001049/*
1050 * Opens the backing file for a BlockDriverState if not yet open
1051 *
1052 * options is a QDict of options to pass to the block drivers, or NULL for an
1053 * empty set of options. The reference to the QDict is transferred to this
1054 * function (even on failure), so if the caller intends to reuse the dictionary,
1055 * it needs to use QINCREF() before calling bdrv_file_open.
1056 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001057int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001058{
1059 char backing_filename[PATH_MAX];
1060 int back_flags, ret;
1061 BlockDriver *back_drv = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001062 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001063
1064 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001065 QDECREF(options);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001066 return 0;
1067 }
1068
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001069 /* NULL means an empty set of options */
1070 if (options == NULL) {
1071 options = qdict_new();
1072 }
1073
Paolo Bonzini9156df12012-10-18 16:49:17 +02001074 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001075 if (qdict_haskey(options, "file.filename")) {
1076 backing_filename[0] = '\0';
1077 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001078 QDECREF(options);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001079 return 0;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001080 } else {
1081 bdrv_get_full_backing_filename(bs, backing_filename,
1082 sizeof(backing_filename));
Paolo Bonzini9156df12012-10-18 16:49:17 +02001083 }
1084
Paolo Bonzini9156df12012-10-18 16:49:17 +02001085 if (bs->backing_format[0] != '\0') {
1086 back_drv = bdrv_find_format(bs->backing_format);
1087 }
1088
1089 /* backing files always opened read-only */
Thibaut LAURENT87a5deb2013-10-25 02:15:07 +02001090 back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT |
1091 BDRV_O_COPY_ON_READ);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001092
Max Reitzf67503e2014-02-18 18:33:05 +01001093 assert(bs->backing_hd == NULL);
1094 ret = bdrv_open(&bs->backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001095 *backing_filename ? backing_filename : NULL, NULL, options,
Max Reitz34b5d2c2013-09-05 14:45:29 +02001096 back_flags, back_drv, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001097 if (ret < 0) {
Paolo Bonzini9156df12012-10-18 16:49:17 +02001098 bs->backing_hd = NULL;
1099 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001100 error_setg(errp, "Could not open backing file: %s",
1101 error_get_pretty(local_err));
1102 error_free(local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001103 return ret;
1104 }
Peter Feinerd80ac652014-01-08 19:43:25 +00001105
1106 if (bs->backing_hd->file) {
1107 pstrcpy(bs->backing_file, sizeof(bs->backing_file),
1108 bs->backing_hd->file->filename);
1109 }
1110
Kevin Wolfd34682c2013-12-11 19:26:16 +01001111 /* Recalculate the BlockLimits with the backing file */
1112 bdrv_refresh_limits(bs);
1113
Paolo Bonzini9156df12012-10-18 16:49:17 +02001114 return 0;
1115}
1116
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001117/*
Max Reitzda557aa2013-12-20 19:28:11 +01001118 * Opens a disk image whose options are given as BlockdevRef in another block
1119 * device's options.
1120 *
1121 * If force_raw is true, bdrv_file_open() will be used, thereby preventing any
1122 * image format auto-detection. If it is false and a filename is given,
1123 * bdrv_open() will be used for auto-detection.
1124 *
1125 * If allow_none is true, no image will be opened if filename is false and no
1126 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1127 *
1128 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1129 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1130 * itself, all options starting with "${bdref_key}." are considered part of the
1131 * BlockdevRef.
1132 *
1133 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001134 *
1135 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001136 */
1137int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1138 QDict *options, const char *bdref_key, int flags,
1139 bool force_raw, bool allow_none, Error **errp)
1140{
1141 QDict *image_options;
1142 int ret;
1143 char *bdref_key_dot;
1144 const char *reference;
1145
Max Reitzf67503e2014-02-18 18:33:05 +01001146 assert(pbs);
1147 assert(*pbs == NULL);
1148
Max Reitzda557aa2013-12-20 19:28:11 +01001149 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1150 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1151 g_free(bdref_key_dot);
1152
1153 reference = qdict_get_try_str(options, bdref_key);
1154 if (!filename && !reference && !qdict_size(image_options)) {
1155 if (allow_none) {
1156 ret = 0;
1157 } else {
1158 error_setg(errp, "A block device must be specified for \"%s\"",
1159 bdref_key);
1160 ret = -EINVAL;
1161 }
1162 goto done;
1163 }
1164
1165 if (filename && !force_raw) {
1166 /* If a filename is given and the block driver should be detected
1167 automatically (instead of using none), use bdrv_open() in order to do
1168 that auto-detection. */
Max Reitzda557aa2013-12-20 19:28:11 +01001169 if (reference) {
1170 error_setg(errp, "Cannot reference an existing block device while "
1171 "giving a filename");
1172 ret = -EINVAL;
1173 goto done;
1174 }
1175
Max Reitzddf56362014-02-18 18:33:06 +01001176 ret = bdrv_open(pbs, filename, NULL, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001177 } else {
Max Reitz2e401342014-02-18 18:33:07 +01001178 ret = bdrv_open(pbs, filename, reference, image_options,
1179 flags | BDRV_O_PROTOCOL, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001180 }
1181
1182done:
1183 qdict_del(options, bdref_key);
1184 return ret;
1185}
1186
1187/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001188 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001189 *
1190 * options is a QDict of options to pass to the block drivers, or NULL for an
1191 * empty set of options. The reference to the QDict belongs to the block layer
1192 * after the call (even on failure), so if the caller intends to reuse the
1193 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001194 *
1195 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1196 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001197 *
1198 * The reference parameter may be used to specify an existing block device which
1199 * should be opened. If specified, neither options nor a filename may be given,
1200 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001201 */
Max Reitzddf56362014-02-18 18:33:06 +01001202int bdrv_open(BlockDriverState **pbs, const char *filename,
1203 const char *reference, QDict *options, int flags,
1204 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001205{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001206 int ret;
Stefan Weil89c9bc32012-11-22 07:25:48 +01001207 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1208 char tmp_filename[PATH_MAX + 1];
Max Reitzf67503e2014-02-18 18:33:05 +01001209 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001210 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001211 Error *local_err = NULL;
bellard712e7872005-04-28 21:09:32 +00001212
Max Reitzf67503e2014-02-18 18:33:05 +01001213 assert(pbs);
1214
Max Reitzddf56362014-02-18 18:33:06 +01001215 if (reference) {
1216 bool options_non_empty = options ? qdict_size(options) : false;
1217 QDECREF(options);
1218
1219 if (*pbs) {
1220 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1221 "another block device");
1222 return -EINVAL;
1223 }
1224
1225 if (filename || options_non_empty) {
1226 error_setg(errp, "Cannot reference an existing block device with "
1227 "additional options or a new filename");
1228 return -EINVAL;
1229 }
1230
1231 bs = bdrv_lookup_bs(reference, reference, errp);
1232 if (!bs) {
1233 return -ENODEV;
1234 }
1235 bdrv_ref(bs);
1236 *pbs = bs;
1237 return 0;
1238 }
1239
Max Reitzf67503e2014-02-18 18:33:05 +01001240 if (*pbs) {
1241 bs = *pbs;
1242 } else {
1243 bs = bdrv_new("");
1244 }
1245
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001246 /* NULL means an empty set of options */
1247 if (options == NULL) {
1248 options = qdict_new();
1249 }
1250
1251 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001252 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001253
Max Reitz5469a2a2014-02-18 18:33:10 +01001254 if (flags & BDRV_O_PROTOCOL) {
1255 assert(!drv);
1256 ret = bdrv_file_open(bs, filename, options, flags & ~BDRV_O_PROTOCOL,
1257 &local_err);
1258 options = NULL;
1259 if (!ret) {
1260 *pbs = bs;
1261 return 0;
1262 } else if (bs->drv) {
1263 goto close_and_fail;
1264 } else {
1265 goto fail;
1266 }
1267 }
1268
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001269 /* For snapshot=on, create a temporary qcow2 overlay */
bellard83f64092006-08-01 16:21:11 +00001270 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +00001271 BlockDriverState *bs1;
1272 int64_t total_size;
Kevin Wolf91a073a2009-05-27 14:48:06 +02001273 BlockDriver *bdrv_qcow2;
Kevin Wolf08b392e2013-03-18 16:17:44 +01001274 QEMUOptionParameter *create_options;
Kevin Wolf9fd31712013-11-14 15:37:12 +01001275 QDict *snapshot_options;
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001276
bellardea2384d2004-08-01 21:59:26 +00001277 /* if snapshot, we create a temporary backing file and open it
1278 instead of opening 'filename' directly */
1279
Kevin Wolf9fd31712013-11-14 15:37:12 +01001280 /* Get the required size from the image */
Kevin Wolf9fd31712013-11-14 15:37:12 +01001281 QINCREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001282 bs1 = NULL;
Max Reitzddf56362014-02-18 18:33:06 +01001283 ret = bdrv_open(&bs1, filename, NULL, options, BDRV_O_NO_BACKING,
Kevin Wolfc9fbb992013-11-28 11:58:02 +01001284 drv, &local_err);
aliguori51d7c002009-03-05 23:00:29 +00001285 if (ret < 0) {
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001286 goto fail;
bellardea2384d2004-08-01 21:59:26 +00001287 }
Jes Sorensen3e829902010-05-27 16:20:30 +02001288 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +00001289
Fam Zheng4f6fd342013-08-23 09:14:47 +08001290 bdrv_unref(bs1);
ths3b46e622007-09-17 08:09:54 +00001291
Kevin Wolf9fd31712013-11-14 15:37:12 +01001292 /* Create the temporary image */
Jim Meyeringeba25052012-05-28 09:27:54 +02001293 ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
1294 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001295 error_setg_errno(errp, -ret, "Could not get temporary filename");
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001296 goto fail;
Jim Meyeringeba25052012-05-28 09:27:54 +02001297 }
aliguori7c96d462008-09-12 17:54:13 +00001298
Kevin Wolf91a073a2009-05-27 14:48:06 +02001299 bdrv_qcow2 = bdrv_find_format("qcow2");
Kevin Wolf08b392e2013-03-18 16:17:44 +01001300 create_options = parse_option_parameters("", bdrv_qcow2->create_options,
1301 NULL);
Kevin Wolf91a073a2009-05-27 14:48:06 +02001302
Kevin Wolf08b392e2013-03-18 16:17:44 +01001303 set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +02001304
Max Reitzcc84d902013-09-06 17:14:26 +02001305 ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options, &local_err);
Kevin Wolf08b392e2013-03-18 16:17:44 +01001306 free_option_parameters(create_options);
aliguori51d7c002009-03-05 23:00:29 +00001307 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001308 error_setg_errno(errp, -ret, "Could not create temporary overlay "
Max Reitzcc84d902013-09-06 17:14:26 +02001309 "'%s': %s", tmp_filename,
1310 error_get_pretty(local_err));
1311 error_free(local_err);
1312 local_err = NULL;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001313 goto fail;
bellardea2384d2004-08-01 21:59:26 +00001314 }
Kevin Wolf91a073a2009-05-27 14:48:06 +02001315
Kevin Wolf9fd31712013-11-14 15:37:12 +01001316 /* Prepare a new options QDict for the temporary file, where user
1317 * options refer to the backing file */
1318 if (filename) {
1319 qdict_put(options, "file.filename", qstring_from_str(filename));
1320 }
1321 if (drv) {
1322 qdict_put(options, "driver", qstring_from_str(drv->format_name));
1323 }
1324
1325 snapshot_options = qdict_new();
1326 qdict_put(snapshot_options, "backing", options);
1327 qdict_flatten(snapshot_options);
1328
1329 bs->options = snapshot_options;
1330 options = qdict_clone_shallow(bs->options);
1331
bellardea2384d2004-08-01 21:59:26 +00001332 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +02001333 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +00001334 bs->is_temporary = 1;
1335 }
bellard712e7872005-04-28 21:09:32 +00001336
Kevin Wolff500a6d2012-11-12 17:35:27 +01001337 /* Open image file without format layer */
Jeff Codybe028ad2012-09-20 15:13:17 -04001338 if (flags & BDRV_O_RDWR) {
1339 flags |= BDRV_O_ALLOW_RDWR;
1340 }
1341
Max Reitzf67503e2014-02-18 18:33:05 +01001342 assert(file == NULL);
Max Reitz054963f2013-12-20 19:28:12 +01001343 ret = bdrv_open_image(&file, filename, options, "file",
1344 bdrv_open_flags(bs, flags | BDRV_O_UNMAP), true, true,
1345 &local_err);
1346 if (ret < 0) {
1347 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001348 }
1349
1350 /* Find the right image format driver */
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001351 drvname = qdict_get_try_str(options, "driver");
1352 if (drvname) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +02001353 drv = bdrv_find_format(drvname);
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001354 qdict_del(options, "driver");
Kevin Wolf06d22aa2013-08-08 17:44:52 +02001355 if (!drv) {
1356 error_setg(errp, "Invalid driver: '%s'", drvname);
1357 ret = -EINVAL;
1358 goto unlink_and_fail;
1359 }
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001360 }
1361
Kevin Wolff500a6d2012-11-12 17:35:27 +01001362 if (!drv) {
Max Reitz2a05cbe2013-12-20 19:28:10 +01001363 if (file) {
1364 ret = find_image_format(file, filename, &drv, &local_err);
1365 } else {
1366 error_setg(errp, "Must specify either driver or file");
1367 ret = -EINVAL;
1368 goto unlink_and_fail;
1369 }
Kevin Wolff500a6d2012-11-12 17:35:27 +01001370 }
1371
1372 if (!drv) {
1373 goto unlink_and_fail;
1374 }
1375
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001376 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001377 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001378 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +01001379 goto unlink_and_fail;
1380 }
1381
Max Reitz2a05cbe2013-12-20 19:28:10 +01001382 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001383 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001384 file = NULL;
1385 }
1386
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001387 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001388 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001389 QDict *backing_options;
1390
Benoît Canet5726d872013-09-25 13:30:01 +02001391 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001392 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001393 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001394 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001395 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001396 }
1397
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001398 /* Check if any unknown options were used */
1399 if (qdict_size(options) != 0) {
1400 const QDictEntry *entry = qdict_first(options);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001401 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1402 "support the option '%s'", drv->format_name, bs->device_name,
1403 entry->key);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001404
1405 ret = -EINVAL;
1406 goto close_and_fail;
1407 }
1408 QDECREF(options);
1409
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001410 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001411 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001412 }
1413
Max Reitzf67503e2014-02-18 18:33:05 +01001414 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001415 return 0;
1416
1417unlink_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001418 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001419 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001420 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001421 if (bs->is_temporary) {
1422 unlink(filename);
1423 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001424fail:
1425 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001426 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001427 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001428 if (!*pbs) {
1429 /* If *pbs is NULL, a new BDS has been created in this function and
1430 needs to be freed now. Otherwise, it does not need to be closed,
1431 since it has not really been opened yet. */
1432 bdrv_unref(bs);
1433 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001434 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001435 error_propagate(errp, local_err);
1436 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001437 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001438
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001439close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001440 /* See fail path, but now the BDS has to be always closed */
1441 if (*pbs) {
1442 bdrv_close(bs);
1443 } else {
1444 bdrv_unref(bs);
1445 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001446 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001447 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001448 error_propagate(errp, local_err);
1449 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001450 return ret;
1451}
1452
Jeff Codye971aa12012-09-20 15:13:19 -04001453typedef struct BlockReopenQueueEntry {
1454 bool prepared;
1455 BDRVReopenState state;
1456 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1457} BlockReopenQueueEntry;
1458
1459/*
1460 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1461 * reopen of multiple devices.
1462 *
1463 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1464 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1465 * be created and initialized. This newly created BlockReopenQueue should be
1466 * passed back in for subsequent calls that are intended to be of the same
1467 * atomic 'set'.
1468 *
1469 * bs is the BlockDriverState to add to the reopen queue.
1470 *
1471 * flags contains the open flags for the associated bs
1472 *
1473 * returns a pointer to bs_queue, which is either the newly allocated
1474 * bs_queue, or the existing bs_queue being used.
1475 *
1476 */
1477BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1478 BlockDriverState *bs, int flags)
1479{
1480 assert(bs != NULL);
1481
1482 BlockReopenQueueEntry *bs_entry;
1483 if (bs_queue == NULL) {
1484 bs_queue = g_new0(BlockReopenQueue, 1);
1485 QSIMPLEQ_INIT(bs_queue);
1486 }
1487
1488 if (bs->file) {
1489 bdrv_reopen_queue(bs_queue, bs->file, flags);
1490 }
1491
1492 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1493 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1494
1495 bs_entry->state.bs = bs;
1496 bs_entry->state.flags = flags;
1497
1498 return bs_queue;
1499}
1500
1501/*
1502 * Reopen multiple BlockDriverStates atomically & transactionally.
1503 *
1504 * The queue passed in (bs_queue) must have been built up previous
1505 * via bdrv_reopen_queue().
1506 *
1507 * Reopens all BDS specified in the queue, with the appropriate
1508 * flags. All devices are prepared for reopen, and failure of any
1509 * device will cause all device changes to be abandonded, and intermediate
1510 * data cleaned up.
1511 *
1512 * If all devices prepare successfully, then the changes are committed
1513 * to all devices.
1514 *
1515 */
1516int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1517{
1518 int ret = -1;
1519 BlockReopenQueueEntry *bs_entry, *next;
1520 Error *local_err = NULL;
1521
1522 assert(bs_queue != NULL);
1523
1524 bdrv_drain_all();
1525
1526 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1527 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1528 error_propagate(errp, local_err);
1529 goto cleanup;
1530 }
1531 bs_entry->prepared = true;
1532 }
1533
1534 /* If we reach this point, we have success and just need to apply the
1535 * changes
1536 */
1537 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1538 bdrv_reopen_commit(&bs_entry->state);
1539 }
1540
1541 ret = 0;
1542
1543cleanup:
1544 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1545 if (ret && bs_entry->prepared) {
1546 bdrv_reopen_abort(&bs_entry->state);
1547 }
1548 g_free(bs_entry);
1549 }
1550 g_free(bs_queue);
1551 return ret;
1552}
1553
1554
1555/* Reopen a single BlockDriverState with the specified flags. */
1556int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1557{
1558 int ret = -1;
1559 Error *local_err = NULL;
1560 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1561
1562 ret = bdrv_reopen_multiple(queue, &local_err);
1563 if (local_err != NULL) {
1564 error_propagate(errp, local_err);
1565 }
1566 return ret;
1567}
1568
1569
1570/*
1571 * Prepares a BlockDriverState for reopen. All changes are staged in the
1572 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1573 * the block driver layer .bdrv_reopen_prepare()
1574 *
1575 * bs is the BlockDriverState to reopen
1576 * flags are the new open flags
1577 * queue is the reopen queue
1578 *
1579 * Returns 0 on success, non-zero on error. On error errp will be set
1580 * as well.
1581 *
1582 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1583 * It is the responsibility of the caller to then call the abort() or
1584 * commit() for any other BDS that have been left in a prepare() state
1585 *
1586 */
1587int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1588 Error **errp)
1589{
1590 int ret = -1;
1591 Error *local_err = NULL;
1592 BlockDriver *drv;
1593
1594 assert(reopen_state != NULL);
1595 assert(reopen_state->bs->drv != NULL);
1596 drv = reopen_state->bs->drv;
1597
1598 /* if we are to stay read-only, do not allow permission change
1599 * to r/w */
1600 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1601 reopen_state->flags & BDRV_O_RDWR) {
1602 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1603 reopen_state->bs->device_name);
1604 goto error;
1605 }
1606
1607
1608 ret = bdrv_flush(reopen_state->bs);
1609 if (ret) {
1610 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1611 strerror(-ret));
1612 goto error;
1613 }
1614
1615 if (drv->bdrv_reopen_prepare) {
1616 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1617 if (ret) {
1618 if (local_err != NULL) {
1619 error_propagate(errp, local_err);
1620 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001621 error_setg(errp, "failed while preparing to reopen image '%s'",
1622 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001623 }
1624 goto error;
1625 }
1626 } else {
1627 /* It is currently mandatory to have a bdrv_reopen_prepare()
1628 * handler for each supported drv. */
1629 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1630 drv->format_name, reopen_state->bs->device_name,
1631 "reopening of file");
1632 ret = -1;
1633 goto error;
1634 }
1635
1636 ret = 0;
1637
1638error:
1639 return ret;
1640}
1641
1642/*
1643 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1644 * makes them final by swapping the staging BlockDriverState contents into
1645 * the active BlockDriverState contents.
1646 */
1647void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1648{
1649 BlockDriver *drv;
1650
1651 assert(reopen_state != NULL);
1652 drv = reopen_state->bs->drv;
1653 assert(drv != NULL);
1654
1655 /* If there are any driver level actions to take */
1656 if (drv->bdrv_reopen_commit) {
1657 drv->bdrv_reopen_commit(reopen_state);
1658 }
1659
1660 /* set BDS specific flags now */
1661 reopen_state->bs->open_flags = reopen_state->flags;
1662 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1663 BDRV_O_CACHE_WB);
1664 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001665
1666 bdrv_refresh_limits(reopen_state->bs);
Jeff Codye971aa12012-09-20 15:13:19 -04001667}
1668
1669/*
1670 * Abort the reopen, and delete and free the staged changes in
1671 * reopen_state
1672 */
1673void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1674{
1675 BlockDriver *drv;
1676
1677 assert(reopen_state != NULL);
1678 drv = reopen_state->bs->drv;
1679 assert(drv != NULL);
1680
1681 if (drv->bdrv_reopen_abort) {
1682 drv->bdrv_reopen_abort(reopen_state);
1683 }
1684}
1685
1686
bellardfc01f7e2003-06-30 10:03:06 +00001687void bdrv_close(BlockDriverState *bs)
1688{
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001689 if (bs->job) {
1690 block_job_cancel_sync(bs->job);
1691 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001692 bdrv_drain_all(); /* complete I/O */
1693 bdrv_flush(bs);
1694 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001695 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001696
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001697 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001698 if (bs->backing_hd) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001699 bdrv_unref(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001700 bs->backing_hd = NULL;
1701 }
bellardea2384d2004-08-01 21:59:26 +00001702 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001703 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001704#ifdef _WIN32
1705 if (bs->is_temporary) {
1706 unlink(bs->filename);
1707 }
bellard67b915a2004-03-31 23:37:16 +00001708#endif
bellardea2384d2004-08-01 21:59:26 +00001709 bs->opaque = NULL;
1710 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001711 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001712 bs->backing_file[0] = '\0';
1713 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001714 bs->total_sectors = 0;
1715 bs->encrypted = 0;
1716 bs->valid_key = 0;
1717 bs->sg = 0;
1718 bs->growable = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001719 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001720 QDECREF(bs->options);
1721 bs->options = NULL;
bellardb3380822004-03-14 21:38:54 +00001722
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001723 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001724 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001725 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001726 }
bellardb3380822004-03-14 21:38:54 +00001727 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001728
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001729 bdrv_dev_change_media_cb(bs, false);
1730
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001731 /*throttling disk I/O limits*/
1732 if (bs->io_limits_enabled) {
1733 bdrv_io_limits_disable(bs);
1734 }
bellardb3380822004-03-14 21:38:54 +00001735}
1736
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001737void bdrv_close_all(void)
1738{
1739 BlockDriverState *bs;
1740
Benoît Canetdc364f42014-01-23 21:31:32 +01001741 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001742 bdrv_close(bs);
1743 }
1744}
1745
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001746/* Check if any requests are in-flight (including throttled requests) */
1747static bool bdrv_requests_pending(BlockDriverState *bs)
1748{
1749 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1750 return true;
1751 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001752 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1753 return true;
1754 }
1755 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001756 return true;
1757 }
1758 if (bs->file && bdrv_requests_pending(bs->file)) {
1759 return true;
1760 }
1761 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1762 return true;
1763 }
1764 return false;
1765}
1766
1767static bool bdrv_requests_pending_all(void)
1768{
1769 BlockDriverState *bs;
Benoît Canetdc364f42014-01-23 21:31:32 +01001770 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001771 if (bdrv_requests_pending(bs)) {
1772 return true;
1773 }
1774 }
1775 return false;
1776}
1777
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001778/*
1779 * Wait for pending requests to complete across all BlockDriverStates
1780 *
1781 * This function does not flush data to disk, use bdrv_flush_all() for that
1782 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001783 *
1784 * Note that completion of an asynchronous I/O operation can trigger any
1785 * number of other I/O operations on other devices---for example a coroutine
1786 * can be arbitrarily complex and a constant flow of I/O can come until the
1787 * coroutine is complete. Because of this, it is not possible to have a
1788 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001789 */
1790void bdrv_drain_all(void)
1791{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001792 /* Always run first iteration so any pending completion BHs run */
1793 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001794 BlockDriverState *bs;
1795
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001796 while (busy) {
Benoît Canetdc364f42014-01-23 21:31:32 +01001797 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczi0b06ef32013-11-26 16:18:00 +01001798 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001799 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001800
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001801 busy = bdrv_requests_pending_all();
1802 busy |= aio_poll(qemu_get_aio_context(), busy);
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001803 }
1804}
1805
Benoît Canetdc364f42014-01-23 21:31:32 +01001806/* make a BlockDriverState anonymous by removing from bdrv_state and
1807 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05001808 Also, NULL terminate the device_name to prevent double remove */
1809void bdrv_make_anon(BlockDriverState *bs)
1810{
1811 if (bs->device_name[0] != '\0') {
Benoît Canetdc364f42014-01-23 21:31:32 +01001812 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Ryan Harperd22b2f42011-03-29 20:51:47 -05001813 }
1814 bs->device_name[0] = '\0';
Benoît Canetdc364f42014-01-23 21:31:32 +01001815 if (bs->node_name[0] != '\0') {
1816 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1817 }
1818 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05001819}
1820
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02001821static void bdrv_rebind(BlockDriverState *bs)
1822{
1823 if (bs->drv && bs->drv->bdrv_rebind) {
1824 bs->drv->bdrv_rebind(bs);
1825 }
1826}
1827
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001828static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1829 BlockDriverState *bs_src)
1830{
1831 /* move some fields that need to stay attached to the device */
1832 bs_dest->open_flags = bs_src->open_flags;
1833
1834 /* dev info */
1835 bs_dest->dev_ops = bs_src->dev_ops;
1836 bs_dest->dev_opaque = bs_src->dev_opaque;
1837 bs_dest->dev = bs_src->dev;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001838 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001839 bs_dest->copy_on_read = bs_src->copy_on_read;
1840
1841 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1842
Benoît Canetcc0681c2013-09-02 14:14:39 +02001843 /* i/o throttled req */
1844 memcpy(&bs_dest->throttle_state,
1845 &bs_src->throttle_state,
1846 sizeof(ThrottleState));
1847 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1848 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001849 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1850
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001851 /* r/w error */
1852 bs_dest->on_read_error = bs_src->on_read_error;
1853 bs_dest->on_write_error = bs_src->on_write_error;
1854
1855 /* i/o status */
1856 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1857 bs_dest->iostatus = bs_src->iostatus;
1858
1859 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08001860 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001861
Fam Zheng9fcb0252013-08-23 09:14:46 +08001862 /* reference count */
1863 bs_dest->refcnt = bs_src->refcnt;
1864
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001865 /* job */
1866 bs_dest->in_use = bs_src->in_use;
1867 bs_dest->job = bs_src->job;
1868
1869 /* keep the same entry in bdrv_states */
1870 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
1871 bs_src->device_name);
Benoît Canetdc364f42014-01-23 21:31:32 +01001872 bs_dest->device_list = bs_src->device_list;
1873
1874 /* keep the same entry in graph_bdrv_states
1875 * We do want to swap name but don't want to swap linked list entries
1876 */
1877 bs_dest->node_list = bs_src->node_list;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001878}
1879
1880/*
1881 * Swap bs contents for two image chains while they are live,
1882 * while keeping required fields on the BlockDriverState that is
1883 * actually attached to a device.
1884 *
1885 * This will modify the BlockDriverState fields, and swap contents
1886 * between bs_new and bs_old. Both bs_new and bs_old are modified.
1887 *
1888 * bs_new is required to be anonymous.
1889 *
1890 * This function does not create any image files.
1891 */
1892void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1893{
1894 BlockDriverState tmp;
1895
1896 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
1897 assert(bs_new->device_name[0] == '\0');
Fam Zhenge4654d22013-11-13 18:29:43 +08001898 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001899 assert(bs_new->job == NULL);
1900 assert(bs_new->dev == NULL);
1901 assert(bs_new->in_use == 0);
1902 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02001903 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001904
1905 tmp = *bs_new;
1906 *bs_new = *bs_old;
1907 *bs_old = tmp;
1908
1909 /* there are some fields that should not be swapped, move them back */
1910 bdrv_move_feature_fields(&tmp, bs_old);
1911 bdrv_move_feature_fields(bs_old, bs_new);
1912 bdrv_move_feature_fields(bs_new, &tmp);
1913
1914 /* bs_new shouldn't be in bdrv_states even after the swap! */
1915 assert(bs_new->device_name[0] == '\0');
1916
1917 /* Check a few fields that should remain attached to the device */
1918 assert(bs_new->dev == NULL);
1919 assert(bs_new->job == NULL);
1920 assert(bs_new->in_use == 0);
1921 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02001922 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001923
1924 bdrv_rebind(bs_new);
1925 bdrv_rebind(bs_old);
1926}
1927
Jeff Cody8802d1f2012-02-28 15:54:06 -05001928/*
1929 * Add new bs contents at the top of an image chain while the chain is
1930 * live, while keeping required fields on the top layer.
1931 *
1932 * This will modify the BlockDriverState fields, and swap contents
1933 * between bs_new and bs_top. Both bs_new and bs_top are modified.
1934 *
Jeff Codyf6801b82012-03-27 16:30:19 -04001935 * bs_new is required to be anonymous.
1936 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05001937 * This function does not create any image files.
1938 */
1939void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1940{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001941 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05001942
1943 /* The contents of 'tmp' will become bs_top, as we are
1944 * swapping bs_new and bs_top contents. */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001945 bs_top->backing_hd = bs_new;
1946 bs_top->open_flags &= ~BDRV_O_NO_BACKING;
1947 pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
1948 bs_new->filename);
1949 pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
1950 bs_new->drv ? bs_new->drv->format_name : "");
Jeff Cody8802d1f2012-02-28 15:54:06 -05001951}
1952
Fam Zheng4f6fd342013-08-23 09:14:47 +08001953static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00001954{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001955 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +02001956 assert(!bs->job);
1957 assert(!bs->in_use);
Fam Zheng4f6fd342013-08-23 09:14:47 +08001958 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08001959 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02001960
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02001961 bdrv_close(bs);
1962
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001963 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05001964 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00001965
Anthony Liguori7267c092011-08-20 22:09:37 -05001966 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00001967}
1968
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001969int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1970/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +02001971{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001972 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +02001973 return -EBUSY;
1974 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001975 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -03001976 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +02001977 return 0;
1978}
1979
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001980/* TODO qdevified devices don't use this, remove when devices are qdevified */
1981void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02001982{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001983 if (bdrv_attach_dev(bs, dev) < 0) {
1984 abort();
1985 }
1986}
1987
1988void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1989/* TODO change to DeviceState *dev when all users are qdevified */
1990{
1991 assert(bs->dev == dev);
1992 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02001993 bs->dev_ops = NULL;
1994 bs->dev_opaque = NULL;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001995 bs->guest_block_size = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +02001996}
1997
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001998/* TODO change to return DeviceState * when all users are qdevified */
1999void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02002000{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002001 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02002002}
2003
Markus Armbruster0e49de52011-08-03 15:07:41 +02002004void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
2005 void *opaque)
2006{
2007 bs->dev_ops = ops;
2008 bs->dev_opaque = opaque;
2009}
2010
Paolo Bonzini32c81a42012-09-28 17:22:58 +02002011void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
2012 enum MonitorEvent ev,
2013 BlockErrorAction action, bool is_read)
Luiz Capitulino329c0a42012-01-25 16:59:43 -02002014{
2015 QObject *data;
2016 const char *action_str;
2017
2018 switch (action) {
2019 case BDRV_ACTION_REPORT:
2020 action_str = "report";
2021 break;
2022 case BDRV_ACTION_IGNORE:
2023 action_str = "ignore";
2024 break;
2025 case BDRV_ACTION_STOP:
2026 action_str = "stop";
2027 break;
2028 default:
2029 abort();
2030 }
2031
2032 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2033 bdrv->device_name,
2034 action_str,
2035 is_read ? "read" : "write");
Paolo Bonzini32c81a42012-09-28 17:22:58 +02002036 monitor_protocol_event(ev, data);
Luiz Capitulino329c0a42012-01-25 16:59:43 -02002037
2038 qobject_decref(data);
2039}
2040
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002041static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
2042{
2043 QObject *data;
2044
2045 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
2046 bdrv_get_device_name(bs), ejected);
2047 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
2048
2049 qobject_decref(data);
2050}
2051
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002052static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02002053{
Markus Armbruster145feb12011-08-03 15:07:42 +02002054 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002055 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002056 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002057 if (tray_was_closed) {
2058 /* tray open */
2059 bdrv_emit_qmp_eject_event(bs, true);
2060 }
2061 if (load) {
2062 /* tray close */
2063 bdrv_emit_qmp_eject_event(bs, false);
2064 }
Markus Armbruster145feb12011-08-03 15:07:42 +02002065 }
2066}
2067
Markus Armbruster2c6942f2011-09-06 18:58:51 +02002068bool bdrv_dev_has_removable_media(BlockDriverState *bs)
2069{
2070 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
2071}
2072
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01002073void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
2074{
2075 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
2076 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
2077 }
2078}
2079
Markus Armbrustere4def802011-09-06 18:58:53 +02002080bool bdrv_dev_is_tray_open(BlockDriverState *bs)
2081{
2082 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
2083 return bs->dev_ops->is_tray_open(bs->dev_opaque);
2084 }
2085 return false;
2086}
2087
Markus Armbruster145feb12011-08-03 15:07:42 +02002088static void bdrv_dev_resize_cb(BlockDriverState *bs)
2089{
2090 if (bs->dev_ops && bs->dev_ops->resize_cb) {
2091 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02002092 }
2093}
2094
Markus Armbrusterf1076392011-09-06 18:58:46 +02002095bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
2096{
2097 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
2098 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
2099 }
2100 return false;
2101}
2102
aliguorie97fc192009-04-21 23:11:50 +00002103/*
2104 * Run consistency checks on an image
2105 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002106 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002107 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002108 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002109 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002110int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002111{
2112 if (bs->drv->bdrv_check == NULL) {
2113 return -ENOTSUP;
2114 }
2115
Kevin Wolfe076f332010-06-29 11:43:13 +02002116 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002117 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002118}
2119
Kevin Wolf8a426612010-07-16 17:17:01 +02002120#define COMMIT_BUF_SECTORS 2048
2121
bellard33e39632003-07-06 17:15:21 +00002122/* commit COW file into the raw image */
2123int bdrv_commit(BlockDriverState *bs)
2124{
bellard19cb3732006-08-19 11:45:59 +00002125 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002126 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002127 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002128 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002129 uint8_t *buf = NULL;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002130 char filename[PATH_MAX];
bellard33e39632003-07-06 17:15:21 +00002131
bellard19cb3732006-08-19 11:45:59 +00002132 if (!drv)
2133 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002134
2135 if (!bs->backing_hd) {
2136 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002137 }
2138
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002139 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
2140 return -EBUSY;
2141 }
2142
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002143 ro = bs->backing_hd->read_only;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002144 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2145 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002146 open_flags = bs->backing_hd->open_flags;
2147
2148 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002149 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2150 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002151 }
bellard33e39632003-07-06 17:15:21 +00002152 }
bellardea2384d2004-08-01 21:59:26 +00002153
Jeff Cody72706ea2014-01-24 09:02:35 -05002154 length = bdrv_getlength(bs);
2155 if (length < 0) {
2156 ret = length;
2157 goto ro_cleanup;
2158 }
2159
2160 backing_length = bdrv_getlength(bs->backing_hd);
2161 if (backing_length < 0) {
2162 ret = backing_length;
2163 goto ro_cleanup;
2164 }
2165
2166 /* If our top snapshot is larger than the backing file image,
2167 * grow the backing file image if possible. If not possible,
2168 * we must return an error */
2169 if (length > backing_length) {
2170 ret = bdrv_truncate(bs->backing_hd, length);
2171 if (ret < 0) {
2172 goto ro_cleanup;
2173 }
2174 }
2175
2176 total_sectors = length >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05002177 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00002178
Kevin Wolf8a426612010-07-16 17:17:01 +02002179 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002180 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2181 if (ret < 0) {
2182 goto ro_cleanup;
2183 }
2184 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002185 ret = bdrv_read(bs, sector, buf, n);
2186 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002187 goto ro_cleanup;
2188 }
2189
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002190 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2191 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002192 goto ro_cleanup;
2193 }
bellardea2384d2004-08-01 21:59:26 +00002194 }
2195 }
bellard95389c82005-12-18 18:28:15 +00002196
Christoph Hellwig1d449522010-01-17 12:32:30 +01002197 if (drv->bdrv_make_empty) {
2198 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002199 if (ret < 0) {
2200 goto ro_cleanup;
2201 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002202 bdrv_flush(bs);
2203 }
bellard95389c82005-12-18 18:28:15 +00002204
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002205 /*
2206 * Make sure all data we wrote to the backing device is actually
2207 * stable on disk.
2208 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002209 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002210 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002211 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002212
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002213 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002214ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05002215 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002216
2217 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002218 /* ignoring error return here */
2219 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002220 }
2221
Christoph Hellwig1d449522010-01-17 12:32:30 +01002222 return ret;
bellard33e39632003-07-06 17:15:21 +00002223}
2224
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002225int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002226{
2227 BlockDriverState *bs;
2228
Benoît Canetdc364f42014-01-23 21:31:32 +01002229 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Jeff Cody272d2d82013-02-26 09:55:48 -05002230 if (bs->drv && bs->backing_hd) {
2231 int ret = bdrv_commit(bs);
2232 if (ret < 0) {
2233 return ret;
2234 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002235 }
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002236 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002237 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002238}
2239
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002240/**
2241 * Remove an active request from the tracked requests list
2242 *
2243 * This function should be called when a tracked request is completing.
2244 */
2245static void tracked_request_end(BdrvTrackedRequest *req)
2246{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002247 if (req->serialising) {
2248 req->bs->serialising_in_flight--;
2249 }
2250
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002251 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002252 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002253}
2254
2255/**
2256 * Add an active request to the tracked requests list
2257 */
2258static void tracked_request_begin(BdrvTrackedRequest *req,
2259 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002260 int64_t offset,
2261 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002262{
2263 *req = (BdrvTrackedRequest){
2264 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002265 .offset = offset,
2266 .bytes = bytes,
2267 .is_write = is_write,
2268 .co = qemu_coroutine_self(),
2269 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002270 .overlap_offset = offset,
2271 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002272 };
2273
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002274 qemu_co_queue_init(&req->wait_queue);
2275
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002276 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2277}
2278
Kevin Wolfe96126f2014-02-08 10:42:18 +01002279static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002280{
Kevin Wolf73271452013-12-04 17:08:50 +01002281 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002282 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2283 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002284
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002285 if (!req->serialising) {
2286 req->bs->serialising_in_flight++;
2287 req->serialising = true;
2288 }
Kevin Wolf73271452013-12-04 17:08:50 +01002289
2290 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2291 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002292}
2293
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002294/**
2295 * Round a region to cluster boundaries
2296 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002297void bdrv_round_to_clusters(BlockDriverState *bs,
2298 int64_t sector_num, int nb_sectors,
2299 int64_t *cluster_sector_num,
2300 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002301{
2302 BlockDriverInfo bdi;
2303
2304 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2305 *cluster_sector_num = sector_num;
2306 *cluster_nb_sectors = nb_sectors;
2307 } else {
2308 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2309 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2310 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2311 nb_sectors, c);
2312 }
2313}
2314
Kevin Wolf73271452013-12-04 17:08:50 +01002315static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002316{
2317 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002318 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002319
Kevin Wolf73271452013-12-04 17:08:50 +01002320 ret = bdrv_get_info(bs, &bdi);
2321 if (ret < 0 || bdi.cluster_size == 0) {
2322 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002323 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002324 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002325 }
2326}
2327
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002328static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002329 int64_t offset, unsigned int bytes)
2330{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002331 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002332 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002333 return false;
2334 }
2335 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002336 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002337 return false;
2338 }
2339 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002340}
2341
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002342static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002343{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002344 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002345 BdrvTrackedRequest *req;
2346 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002347 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002348
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002349 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002350 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002351 }
2352
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002353 do {
2354 retry = false;
2355 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002356 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002357 continue;
2358 }
Kevin Wolf73271452013-12-04 17:08:50 +01002359 if (tracked_request_overlaps(req, self->overlap_offset,
2360 self->overlap_bytes))
2361 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002362 /* Hitting this means there was a reentrant request, for
2363 * example, a block driver issuing nested requests. This must
2364 * never happen since it means deadlock.
2365 */
2366 assert(qemu_coroutine_self() != req->co);
2367
Kevin Wolf64604402013-12-13 13:04:35 +01002368 /* If the request is already (indirectly) waiting for us, or
2369 * will wait for us as soon as it wakes up, then just go on
2370 * (instead of producing a deadlock in the former case). */
2371 if (!req->waiting_for) {
2372 self->waiting_for = req;
2373 qemu_co_queue_wait(&req->wait_queue);
2374 self->waiting_for = NULL;
2375 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002376 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002377 break;
2378 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002379 }
2380 }
2381 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002382
2383 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002384}
2385
Kevin Wolf756e6732010-01-12 12:55:17 +01002386/*
2387 * Return values:
2388 * 0 - success
2389 * -EINVAL - backing format specified, but no file
2390 * -ENOSPC - can't update the backing file because no space is left in the
2391 * image file header
2392 * -ENOTSUP - format driver doesn't support changing the backing file
2393 */
2394int bdrv_change_backing_file(BlockDriverState *bs,
2395 const char *backing_file, const char *backing_fmt)
2396{
2397 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002398 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002399
Paolo Bonzini5f377792012-04-12 14:01:01 +02002400 /* Backing file format doesn't make sense without a backing file */
2401 if (backing_fmt && !backing_file) {
2402 return -EINVAL;
2403 }
2404
Kevin Wolf756e6732010-01-12 12:55:17 +01002405 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002406 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002407 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002408 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002409 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002410
2411 if (ret == 0) {
2412 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2413 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2414 }
2415 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002416}
2417
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002418/*
2419 * Finds the image layer in the chain that has 'bs' as its backing file.
2420 *
2421 * active is the current topmost image.
2422 *
2423 * Returns NULL if bs is not found in active's image chain,
2424 * or if active == bs.
2425 */
2426BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2427 BlockDriverState *bs)
2428{
2429 BlockDriverState *overlay = NULL;
2430 BlockDriverState *intermediate;
2431
2432 assert(active != NULL);
2433 assert(bs != NULL);
2434
2435 /* if bs is the same as active, then by definition it has no overlay
2436 */
2437 if (active == bs) {
2438 return NULL;
2439 }
2440
2441 intermediate = active;
2442 while (intermediate->backing_hd) {
2443 if (intermediate->backing_hd == bs) {
2444 overlay = intermediate;
2445 break;
2446 }
2447 intermediate = intermediate->backing_hd;
2448 }
2449
2450 return overlay;
2451}
2452
2453typedef struct BlkIntermediateStates {
2454 BlockDriverState *bs;
2455 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2456} BlkIntermediateStates;
2457
2458
2459/*
2460 * Drops images above 'base' up to and including 'top', and sets the image
2461 * above 'top' to have base as its backing file.
2462 *
2463 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2464 * information in 'bs' can be properly updated.
2465 *
2466 * E.g., this will convert the following chain:
2467 * bottom <- base <- intermediate <- top <- active
2468 *
2469 * to
2470 *
2471 * bottom <- base <- active
2472 *
2473 * It is allowed for bottom==base, in which case it converts:
2474 *
2475 * base <- intermediate <- top <- active
2476 *
2477 * to
2478 *
2479 * base <- active
2480 *
2481 * Error conditions:
2482 * if active == top, that is considered an error
2483 *
2484 */
2485int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2486 BlockDriverState *base)
2487{
2488 BlockDriverState *intermediate;
2489 BlockDriverState *base_bs = NULL;
2490 BlockDriverState *new_top_bs = NULL;
2491 BlkIntermediateStates *intermediate_state, *next;
2492 int ret = -EIO;
2493
2494 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2495 QSIMPLEQ_INIT(&states_to_delete);
2496
2497 if (!top->drv || !base->drv) {
2498 goto exit;
2499 }
2500
2501 new_top_bs = bdrv_find_overlay(active, top);
2502
2503 if (new_top_bs == NULL) {
2504 /* we could not find the image above 'top', this is an error */
2505 goto exit;
2506 }
2507
2508 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2509 * to do, no intermediate images */
2510 if (new_top_bs->backing_hd == base) {
2511 ret = 0;
2512 goto exit;
2513 }
2514
2515 intermediate = top;
2516
2517 /* now we will go down through the list, and add each BDS we find
2518 * into our deletion queue, until we hit the 'base'
2519 */
2520 while (intermediate) {
2521 intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
2522 intermediate_state->bs = intermediate;
2523 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2524
2525 if (intermediate->backing_hd == base) {
2526 base_bs = intermediate->backing_hd;
2527 break;
2528 }
2529 intermediate = intermediate->backing_hd;
2530 }
2531 if (base_bs == NULL) {
2532 /* something went wrong, we did not end at the base. safely
2533 * unravel everything, and exit with error */
2534 goto exit;
2535 }
2536
2537 /* success - we can delete the intermediate states, and link top->base */
2538 ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
2539 base_bs->drv ? base_bs->drv->format_name : "");
2540 if (ret) {
2541 goto exit;
2542 }
2543 new_top_bs->backing_hd = base_bs;
2544
Kevin Wolf355ef4a2013-12-11 20:14:09 +01002545 bdrv_refresh_limits(new_top_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002546
2547 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2548 /* so that bdrv_close() does not recursively close the chain */
2549 intermediate_state->bs->backing_hd = NULL;
Fam Zheng4f6fd342013-08-23 09:14:47 +08002550 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002551 }
2552 ret = 0;
2553
2554exit:
2555 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2556 g_free(intermediate_state);
2557 }
2558 return ret;
2559}
2560
2561
aliguori71d07702009-03-03 17:37:16 +00002562static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2563 size_t size)
2564{
2565 int64_t len;
2566
2567 if (!bdrv_is_inserted(bs))
2568 return -ENOMEDIUM;
2569
2570 if (bs->growable)
2571 return 0;
2572
2573 len = bdrv_getlength(bs);
2574
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002575 if (offset < 0)
2576 return -EIO;
2577
2578 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00002579 return -EIO;
2580
2581 return 0;
2582}
2583
2584static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2585 int nb_sectors)
2586{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002587 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2588 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002589}
2590
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002591typedef struct RwCo {
2592 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002593 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002594 QEMUIOVector *qiov;
2595 bool is_write;
2596 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002597 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002598} RwCo;
2599
2600static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2601{
2602 RwCo *rwco = opaque;
2603
2604 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002605 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2606 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002607 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002608 } else {
2609 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2610 rwco->qiov->size, rwco->qiov,
2611 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002612 }
2613}
2614
2615/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002616 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002617 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002618static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2619 QEMUIOVector *qiov, bool is_write,
2620 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002621{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002622 Coroutine *co;
2623 RwCo rwco = {
2624 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002625 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002626 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002627 .is_write = is_write,
2628 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002629 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002630 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002631
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002632 /**
2633 * In sync call context, when the vcpu is blocked, this throttling timer
2634 * will not fire; so the I/O throttling function has to be disabled here
2635 * if it has been enabled.
2636 */
2637 if (bs->io_limits_enabled) {
2638 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2639 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2640 bdrv_io_limits_disable(bs);
2641 }
2642
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002643 if (qemu_in_coroutine()) {
2644 /* Fast-path if already in coroutine context */
2645 bdrv_rw_co_entry(&rwco);
2646 } else {
2647 co = qemu_coroutine_create(bdrv_rw_co_entry);
2648 qemu_coroutine_enter(co, &rwco);
2649 while (rwco.ret == NOT_DONE) {
2650 qemu_aio_wait();
2651 }
2652 }
2653 return rwco.ret;
2654}
2655
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002656/*
2657 * Process a synchronous request using coroutines
2658 */
2659static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002660 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002661{
2662 QEMUIOVector qiov;
2663 struct iovec iov = {
2664 .iov_base = (void *)buf,
2665 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2666 };
2667
2668 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002669 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2670 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002671}
2672
bellard19cb3732006-08-19 11:45:59 +00002673/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002674int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002675 uint8_t *buf, int nb_sectors)
2676{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002677 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002678}
2679
Markus Armbruster07d27a42012-06-29 17:34:29 +02002680/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2681int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2682 uint8_t *buf, int nb_sectors)
2683{
2684 bool enabled;
2685 int ret;
2686
2687 enabled = bs->io_limits_enabled;
2688 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002689 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002690 bs->io_limits_enabled = enabled;
2691 return ret;
2692}
2693
ths5fafdf22007-09-16 21:08:06 +00002694/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002695 -EIO generic I/O error (may happen for all errors)
2696 -ENOMEDIUM No media inserted.
2697 -EINVAL Invalid sector number or nb_sectors
2698 -EACCES Trying to write a read-only device
2699*/
ths5fafdf22007-09-16 21:08:06 +00002700int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002701 const uint8_t *buf, int nb_sectors)
2702{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002703 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002704}
2705
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002706int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2707 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002708{
2709 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002710 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002711}
2712
Peter Lievend75cbb52013-10-24 12:07:03 +02002713/*
2714 * Completely zero out a block device with the help of bdrv_write_zeroes.
2715 * The operation is sped up by checking the block status and only writing
2716 * zeroes to the device if they currently do not return zeroes. Optional
2717 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2718 *
2719 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2720 */
2721int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2722{
2723 int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
2724 int64_t ret, nb_sectors, sector_num = 0;
2725 int n;
2726
2727 for (;;) {
2728 nb_sectors = target_size - sector_num;
2729 if (nb_sectors <= 0) {
2730 return 0;
2731 }
2732 if (nb_sectors > INT_MAX) {
2733 nb_sectors = INT_MAX;
2734 }
2735 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002736 if (ret < 0) {
2737 error_report("error getting block status at sector %" PRId64 ": %s",
2738 sector_num, strerror(-ret));
2739 return ret;
2740 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002741 if (ret & BDRV_BLOCK_ZERO) {
2742 sector_num += n;
2743 continue;
2744 }
2745 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2746 if (ret < 0) {
2747 error_report("error writing zeroes at sector %" PRId64 ": %s",
2748 sector_num, strerror(-ret));
2749 return ret;
2750 }
2751 sector_num += n;
2752 }
2753}
2754
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002755int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002756{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002757 QEMUIOVector qiov;
2758 struct iovec iov = {
2759 .iov_base = (void *)buf,
2760 .iov_len = bytes,
2761 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002762 int ret;
bellard83f64092006-08-01 16:21:11 +00002763
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002764 if (bytes < 0) {
2765 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002766 }
2767
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002768 qemu_iovec_init_external(&qiov, &iov, 1);
2769 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2770 if (ret < 0) {
2771 return ret;
bellard83f64092006-08-01 16:21:11 +00002772 }
2773
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002774 return bytes;
bellard83f64092006-08-01 16:21:11 +00002775}
2776
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002777int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002778{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002779 int ret;
bellard83f64092006-08-01 16:21:11 +00002780
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002781 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2782 if (ret < 0) {
2783 return ret;
bellard83f64092006-08-01 16:21:11 +00002784 }
2785
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002786 return qiov->size;
2787}
2788
2789int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002790 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002791{
2792 QEMUIOVector qiov;
2793 struct iovec iov = {
2794 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002795 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002796 };
2797
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002798 if (bytes < 0) {
2799 return -EINVAL;
2800 }
2801
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002802 qemu_iovec_init_external(&qiov, &iov, 1);
2803 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002804}
bellard83f64092006-08-01 16:21:11 +00002805
Kevin Wolff08145f2010-06-16 16:38:15 +02002806/*
2807 * Writes to the file and ensures that no writes are reordered across this
2808 * request (acts as a barrier)
2809 *
2810 * Returns 0 on success, -errno in error cases.
2811 */
2812int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2813 const void *buf, int count)
2814{
2815 int ret;
2816
2817 ret = bdrv_pwrite(bs, offset, buf, count);
2818 if (ret < 0) {
2819 return ret;
2820 }
2821
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002822 /* No flush needed for cache modes that already do it */
2823 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002824 bdrv_flush(bs);
2825 }
2826
2827 return 0;
2828}
2829
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002830static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002831 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2832{
2833 /* Perform I/O through a temporary buffer so that users who scribble over
2834 * their read buffer while the operation is in progress do not end up
2835 * modifying the image file. This is critical for zero-copy guest I/O
2836 * where anything might happen inside guest memory.
2837 */
2838 void *bounce_buffer;
2839
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002840 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002841 struct iovec iov;
2842 QEMUIOVector bounce_qiov;
2843 int64_t cluster_sector_num;
2844 int cluster_nb_sectors;
2845 size_t skip_bytes;
2846 int ret;
2847
2848 /* Cover entire cluster so no additional backing file I/O is required when
2849 * allocating cluster in the image file.
2850 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002851 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2852 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002853
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002854 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2855 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002856
2857 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
2858 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
2859 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
2860
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002861 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
2862 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002863 if (ret < 0) {
2864 goto err;
2865 }
2866
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002867 if (drv->bdrv_co_write_zeroes &&
2868 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01002869 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002870 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002871 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002872 /* This does not change the data on the disk, it is not necessary
2873 * to flush even in cache=writethrough mode.
2874 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002875 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002876 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002877 }
2878
Stefan Hajnocziab185922011-11-17 13:40:31 +00002879 if (ret < 0) {
2880 /* It might be okay to ignore write errors for guest requests. If this
2881 * is a deliberate copy-on-read then we don't want to ignore the error.
2882 * Simply report it in all cases.
2883 */
2884 goto err;
2885 }
2886
2887 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04002888 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
2889 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002890
2891err:
2892 qemu_vfree(bounce_buffer);
2893 return ret;
2894}
2895
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002896/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002897 * Forwards an already correctly aligned request to the BlockDriver. This
2898 * handles copy on read and zeroing after EOF; any other features must be
2899 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002900 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002901static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01002902 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01002903 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02002904{
2905 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002906 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002907
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002908 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
2909 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002910
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002911 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
2912 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
2913
2914 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002915 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01002916 /* If we touch the same cluster it counts as an overlap. This
2917 * guarantees that allocating writes will be serialized and not race
2918 * with each other for the same cluster. For example, in copy-on-read
2919 * it ensures that the CoR read and write operations are atomic and
2920 * guest writes cannot interleave between them. */
2921 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002922 }
2923
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002924 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002925
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002926 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00002927 int pnum;
2928
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02002929 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002930 if (ret < 0) {
2931 goto out;
2932 }
2933
2934 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002935 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002936 goto out;
2937 }
2938 }
2939
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002940 /* Forward the request to the BlockDriver */
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08002941 if (!(bs->zero_beyond_eof && bs->growable)) {
2942 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
2943 } else {
2944 /* Read zeros after EOF of growable BDSes */
2945 int64_t len, total_sectors, max_nb_sectors;
2946
2947 len = bdrv_getlength(bs);
2948 if (len < 0) {
2949 ret = len;
2950 goto out;
2951 }
2952
Fam Zhengd055a1f2013-09-26 19:55:33 +08002953 total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE);
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01002954 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
2955 align >> BDRV_SECTOR_BITS);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08002956 if (max_nb_sectors > 0) {
2957 ret = drv->bdrv_co_readv(bs, sector_num,
2958 MIN(nb_sectors, max_nb_sectors), qiov);
2959 } else {
2960 ret = 0;
2961 }
2962
2963 /* Reading beyond end of file is supposed to produce zeroes */
2964 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
2965 uint64_t offset = MAX(0, total_sectors - sector_num);
2966 uint64_t bytes = (sector_num + nb_sectors - offset) *
2967 BDRV_SECTOR_SIZE;
2968 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
2969 }
2970 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00002971
2972out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002973 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002974}
2975
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002976/*
2977 * Handle a read request in coroutine context
2978 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01002979static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
2980 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002981 BdrvRequestFlags flags)
2982{
2983 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01002984 BdrvTrackedRequest req;
2985
Kevin Wolf1b0288a2013-12-02 16:09:46 +01002986 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
2987 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
2988 uint8_t *head_buf = NULL;
2989 uint8_t *tail_buf = NULL;
2990 QEMUIOVector local_qiov;
2991 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002992 int ret;
2993
2994 if (!drv) {
2995 return -ENOMEDIUM;
2996 }
Kevin Wolf1b0288a2013-12-02 16:09:46 +01002997 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002998 return -EIO;
2999 }
3000
3001 if (bs->copy_on_read) {
3002 flags |= BDRV_REQ_COPY_ON_READ;
3003 }
3004
3005 /* throttling disk I/O */
3006 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003007 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003008 }
3009
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003010 /* Align read if necessary by padding qiov */
3011 if (offset & (align - 1)) {
3012 head_buf = qemu_blockalign(bs, align);
3013 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3014 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3015 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3016 use_local_qiov = true;
3017
3018 bytes += offset & (align - 1);
3019 offset = offset & ~(align - 1);
3020 }
3021
3022 if ((offset + bytes) & (align - 1)) {
3023 if (!use_local_qiov) {
3024 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3025 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3026 use_local_qiov = true;
3027 }
3028 tail_buf = qemu_blockalign(bs, align);
3029 qemu_iovec_add(&local_qiov, tail_buf,
3030 align - ((offset + bytes) & (align - 1)));
3031
3032 bytes = ROUND_UP(bytes, align);
3033 }
3034
Kevin Wolf65afd212013-12-03 14:55:55 +01003035 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003036 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003037 use_local_qiov ? &local_qiov : qiov,
3038 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003039 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003040
3041 if (use_local_qiov) {
3042 qemu_iovec_destroy(&local_qiov);
3043 qemu_vfree(head_buf);
3044 qemu_vfree(tail_buf);
3045 }
3046
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003047 return ret;
3048}
3049
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003050static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3051 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3052 BdrvRequestFlags flags)
3053{
3054 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3055 return -EINVAL;
3056 }
3057
3058 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3059 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3060}
3061
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003062int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003063 int nb_sectors, QEMUIOVector *qiov)
3064{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003065 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003066
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003067 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3068}
3069
3070int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3071 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3072{
3073 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3074
3075 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3076 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003077}
3078
Peter Lievenc31cb702013-10-24 12:06:58 +02003079/* if no limit is specified in the BlockLimits use a default
3080 * of 32768 512-byte sectors (16 MiB) per request.
3081 */
3082#define MAX_WRITE_ZEROES_DEFAULT 32768
3083
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003084static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003085 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003086{
3087 BlockDriver *drv = bs->drv;
3088 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003089 struct iovec iov = {0};
3090 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003091
Peter Lievenc31cb702013-10-24 12:06:58 +02003092 int max_write_zeroes = bs->bl.max_write_zeroes ?
3093 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
Kevin Wolf621f0582012-03-20 15:12:58 +01003094
Peter Lievenc31cb702013-10-24 12:06:58 +02003095 while (nb_sectors > 0 && !ret) {
3096 int num = nb_sectors;
3097
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003098 /* Align request. Block drivers can expect the "bulk" of the request
3099 * to be aligned.
3100 */
3101 if (bs->bl.write_zeroes_alignment
3102 && num > bs->bl.write_zeroes_alignment) {
3103 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3104 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003105 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003106 num -= sector_num % bs->bl.write_zeroes_alignment;
3107 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3108 /* Shorten the request to the last aligned sector. num cannot
3109 * underflow because num > bs->bl.write_zeroes_alignment.
3110 */
3111 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003112 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003113 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003114
3115 /* limit request size */
3116 if (num > max_write_zeroes) {
3117 num = max_write_zeroes;
3118 }
3119
3120 ret = -ENOTSUP;
3121 /* First try the efficient write zeroes operation */
3122 if (drv->bdrv_co_write_zeroes) {
3123 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3124 }
3125
3126 if (ret == -ENOTSUP) {
3127 /* Fall back to bounce buffer if write zeroes is unsupported */
3128 iov.iov_len = num * BDRV_SECTOR_SIZE;
3129 if (iov.iov_base == NULL) {
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003130 iov.iov_base = qemu_blockalign(bs, num * BDRV_SECTOR_SIZE);
3131 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003132 }
3133 qemu_iovec_init_external(&qiov, &iov, 1);
3134
3135 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003136
3137 /* Keep bounce buffer around if it is big enough for all
3138 * all future requests.
3139 */
3140 if (num < max_write_zeroes) {
3141 qemu_vfree(iov.iov_base);
3142 iov.iov_base = NULL;
3143 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003144 }
3145
3146 sector_num += num;
3147 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003148 }
3149
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003150 qemu_vfree(iov.iov_base);
3151 return ret;
3152}
3153
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003154/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003155 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003156 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003157static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003158 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3159 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003160{
3161 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003162 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003163 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003164
Kevin Wolfb404f722013-12-03 14:02:23 +01003165 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3166 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003167
Kevin Wolfb404f722013-12-03 14:02:23 +01003168 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3169 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003170
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003171 waited = wait_serialising_requests(req);
3172 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003173 assert(req->overlap_offset <= offset);
3174 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003175
Kevin Wolf65afd212013-12-03 14:55:55 +01003176 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003177
3178 if (ret < 0) {
3179 /* Do nothing, write notifier decided to fail this request */
3180 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003181 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003182 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003183 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003184 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003185 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3186 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003187 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003188
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003189 if (ret == 0 && !bs->enable_write_cache) {
3190 ret = bdrv_co_flush(bs);
3191 }
3192
Fam Zhenge4654d22013-11-13 18:29:43 +08003193 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003194
3195 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
3196 bs->wr_highest_sector = sector_num + nb_sectors - 1;
3197 }
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003198 if (bs->growable && ret >= 0) {
3199 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3200 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003201
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003202 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003203}
3204
Kevin Wolfb404f722013-12-03 14:02:23 +01003205/*
3206 * Handle a write request in coroutine context
3207 */
Kevin Wolf66015532013-12-03 14:40:18 +01003208static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3209 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003210 BdrvRequestFlags flags)
3211{
Kevin Wolf65afd212013-12-03 14:55:55 +01003212 BdrvTrackedRequest req;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003213 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3214 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3215 uint8_t *head_buf = NULL;
3216 uint8_t *tail_buf = NULL;
3217 QEMUIOVector local_qiov;
3218 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003219 int ret;
3220
3221 if (!bs->drv) {
3222 return -ENOMEDIUM;
3223 }
3224 if (bs->read_only) {
3225 return -EACCES;
3226 }
Kevin Wolf66015532013-12-03 14:40:18 +01003227 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfb404f722013-12-03 14:02:23 +01003228 return -EIO;
3229 }
3230
Kevin Wolfb404f722013-12-03 14:02:23 +01003231 /* throttling disk I/O */
3232 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003233 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003234 }
3235
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003236 /*
3237 * Align write if necessary by performing a read-modify-write cycle.
3238 * Pad qiov with the read parts and be sure to have a tracked request not
3239 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3240 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003241 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003242
3243 if (offset & (align - 1)) {
3244 QEMUIOVector head_qiov;
3245 struct iovec head_iov;
3246
3247 mark_request_serialising(&req, align);
3248 wait_serialising_requests(&req);
3249
3250 head_buf = qemu_blockalign(bs, align);
3251 head_iov = (struct iovec) {
3252 .iov_base = head_buf,
3253 .iov_len = align,
3254 };
3255 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3256
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003257 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003258 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3259 align, &head_qiov, 0);
3260 if (ret < 0) {
3261 goto fail;
3262 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003263 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003264
3265 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3266 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3267 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3268 use_local_qiov = true;
3269
3270 bytes += offset & (align - 1);
3271 offset = offset & ~(align - 1);
3272 }
3273
3274 if ((offset + bytes) & (align - 1)) {
3275 QEMUIOVector tail_qiov;
3276 struct iovec tail_iov;
3277 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003278 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003279
3280 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003281 waited = wait_serialising_requests(&req);
3282 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003283
3284 tail_buf = qemu_blockalign(bs, align);
3285 tail_iov = (struct iovec) {
3286 .iov_base = tail_buf,
3287 .iov_len = align,
3288 };
3289 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3290
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003291 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003292 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3293 align, &tail_qiov, 0);
3294 if (ret < 0) {
3295 goto fail;
3296 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003297 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003298
3299 if (!use_local_qiov) {
3300 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3301 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3302 use_local_qiov = true;
3303 }
3304
3305 tail_bytes = (offset + bytes) & (align - 1);
3306 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3307
3308 bytes = ROUND_UP(bytes, align);
3309 }
3310
3311 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3312 use_local_qiov ? &local_qiov : qiov,
3313 flags);
3314
3315fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003316 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003317
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003318 if (use_local_qiov) {
3319 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003320 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003321 qemu_vfree(head_buf);
3322 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003323
Kevin Wolfb404f722013-12-03 14:02:23 +01003324 return ret;
3325}
3326
Kevin Wolf66015532013-12-03 14:40:18 +01003327static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3328 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3329 BdrvRequestFlags flags)
3330{
3331 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3332 return -EINVAL;
3333 }
3334
3335 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3336 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3337}
3338
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003339int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3340 int nb_sectors, QEMUIOVector *qiov)
3341{
3342 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3343
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003344 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3345}
3346
3347int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003348 int64_t sector_num, int nb_sectors,
3349 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003350{
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003351 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003352
Peter Lievend32f35c2013-10-24 12:06:52 +02003353 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3354 flags &= ~BDRV_REQ_MAY_UNMAP;
3355 }
3356
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003357 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003358 BDRV_REQ_ZERO_WRITE | flags);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003359}
3360
bellard83f64092006-08-01 16:21:11 +00003361/**
bellard83f64092006-08-01 16:21:11 +00003362 * Truncate file to 'offset' bytes (needed only for file protocols)
3363 */
3364int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3365{
3366 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003367 int ret;
bellard83f64092006-08-01 16:21:11 +00003368 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003369 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003370 if (!drv->bdrv_truncate)
3371 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003372 if (bs->read_only)
3373 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02003374 if (bdrv_in_use(bs))
3375 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003376 ret = drv->bdrv_truncate(bs, offset);
3377 if (ret == 0) {
3378 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02003379 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003380 }
3381 return ret;
bellard83f64092006-08-01 16:21:11 +00003382}
3383
3384/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003385 * Length of a allocated file in bytes. Sparse files are counted by actual
3386 * allocated space. Return < 0 if error or unknown.
3387 */
3388int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3389{
3390 BlockDriver *drv = bs->drv;
3391 if (!drv) {
3392 return -ENOMEDIUM;
3393 }
3394 if (drv->bdrv_get_allocated_file_size) {
3395 return drv->bdrv_get_allocated_file_size(bs);
3396 }
3397 if (bs->file) {
3398 return bdrv_get_allocated_file_size(bs->file);
3399 }
3400 return -ENOTSUP;
3401}
3402
3403/**
bellard83f64092006-08-01 16:21:11 +00003404 * Length of a file in bytes. Return < 0 if error or unknown.
3405 */
3406int64_t bdrv_getlength(BlockDriverState *bs)
3407{
3408 BlockDriver *drv = bs->drv;
3409 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003410 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003411
Kevin Wolfb94a2612013-10-29 12:18:58 +01003412 if (drv->has_variable_length) {
3413 int ret = refresh_total_sectors(bs, bs->total_sectors);
3414 if (ret < 0) {
3415 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003416 }
bellard83f64092006-08-01 16:21:11 +00003417 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003418 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003419}
3420
bellard19cb3732006-08-19 11:45:59 +00003421/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003422void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003423{
bellard19cb3732006-08-19 11:45:59 +00003424 int64_t length;
3425 length = bdrv_getlength(bs);
3426 if (length < 0)
3427 length = 0;
3428 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01003429 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00003430 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00003431}
bellardcf989512004-02-16 21:56:36 +00003432
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003433void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3434 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003435{
3436 bs->on_read_error = on_read_error;
3437 bs->on_write_error = on_write_error;
3438}
3439
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003440BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003441{
3442 return is_read ? bs->on_read_error : bs->on_write_error;
3443}
3444
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003445BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3446{
3447 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3448
3449 switch (on_err) {
3450 case BLOCKDEV_ON_ERROR_ENOSPC:
3451 return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
3452 case BLOCKDEV_ON_ERROR_STOP:
3453 return BDRV_ACTION_STOP;
3454 case BLOCKDEV_ON_ERROR_REPORT:
3455 return BDRV_ACTION_REPORT;
3456 case BLOCKDEV_ON_ERROR_IGNORE:
3457 return BDRV_ACTION_IGNORE;
3458 default:
3459 abort();
3460 }
3461}
3462
3463/* This is done by device models because, while the block layer knows
3464 * about the error, it does not know whether an operation comes from
3465 * the device or the block layer (from a job, for example).
3466 */
3467void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3468 bool is_read, int error)
3469{
3470 assert(error >= 0);
Paolo Bonzini32c81a42012-09-28 17:22:58 +02003471 bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003472 if (action == BDRV_ACTION_STOP) {
3473 vm_stop(RUN_STATE_IO_ERROR);
3474 bdrv_iostatus_set_err(bs, error);
3475 }
3476}
3477
bellardb3380822004-03-14 21:38:54 +00003478int bdrv_is_read_only(BlockDriverState *bs)
3479{
3480 return bs->read_only;
3481}
3482
ths985a03b2007-12-24 16:10:43 +00003483int bdrv_is_sg(BlockDriverState *bs)
3484{
3485 return bs->sg;
3486}
3487
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003488int bdrv_enable_write_cache(BlockDriverState *bs)
3489{
3490 return bs->enable_write_cache;
3491}
3492
Paolo Bonzini425b0142012-06-06 00:04:52 +02003493void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3494{
3495 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003496
3497 /* so a reopen() will preserve wce */
3498 if (wce) {
3499 bs->open_flags |= BDRV_O_CACHE_WB;
3500 } else {
3501 bs->open_flags &= ~BDRV_O_CACHE_WB;
3502 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003503}
3504
bellardea2384d2004-08-01 21:59:26 +00003505int bdrv_is_encrypted(BlockDriverState *bs)
3506{
3507 if (bs->backing_hd && bs->backing_hd->encrypted)
3508 return 1;
3509 return bs->encrypted;
3510}
3511
aliguoric0f4ce72009-03-05 23:01:01 +00003512int bdrv_key_required(BlockDriverState *bs)
3513{
3514 BlockDriverState *backing_hd = bs->backing_hd;
3515
3516 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3517 return 1;
3518 return (bs->encrypted && !bs->valid_key);
3519}
3520
bellardea2384d2004-08-01 21:59:26 +00003521int bdrv_set_key(BlockDriverState *bs, const char *key)
3522{
3523 int ret;
3524 if (bs->backing_hd && bs->backing_hd->encrypted) {
3525 ret = bdrv_set_key(bs->backing_hd, key);
3526 if (ret < 0)
3527 return ret;
3528 if (!bs->encrypted)
3529 return 0;
3530 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003531 if (!bs->encrypted) {
3532 return -EINVAL;
3533 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3534 return -ENOMEDIUM;
3535 }
aliguoric0f4ce72009-03-05 23:01:01 +00003536 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003537 if (ret < 0) {
3538 bs->valid_key = 0;
3539 } else if (!bs->valid_key) {
3540 bs->valid_key = 1;
3541 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02003542 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00003543 }
aliguoric0f4ce72009-03-05 23:01:01 +00003544 return ret;
bellardea2384d2004-08-01 21:59:26 +00003545}
3546
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003547const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003548{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003549 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003550}
3551
ths5fafdf22007-09-16 21:08:06 +00003552void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003553 void *opaque)
3554{
3555 BlockDriver *drv;
3556
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003557 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00003558 it(opaque, drv->format_name);
3559 }
3560}
3561
Benoît Canetdc364f42014-01-23 21:31:32 +01003562/* This function is to find block backend bs */
bellardb3380822004-03-14 21:38:54 +00003563BlockDriverState *bdrv_find(const char *name)
3564{
3565 BlockDriverState *bs;
3566
Benoît Canetdc364f42014-01-23 21:31:32 +01003567 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003568 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00003569 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003570 }
bellardb3380822004-03-14 21:38:54 +00003571 }
3572 return NULL;
3573}
3574
Benoît Canetdc364f42014-01-23 21:31:32 +01003575/* This function is to find a node in the bs graph */
3576BlockDriverState *bdrv_find_node(const char *node_name)
3577{
3578 BlockDriverState *bs;
3579
3580 assert(node_name);
3581
3582 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3583 if (!strcmp(node_name, bs->node_name)) {
3584 return bs;
3585 }
3586 }
3587 return NULL;
3588}
3589
Benoît Canetc13163f2014-01-23 21:31:34 +01003590/* Put this QMP function here so it can access the static graph_bdrv_states. */
3591BlockDeviceInfoList *bdrv_named_nodes_list(void)
3592{
3593 BlockDeviceInfoList *list, *entry;
3594 BlockDriverState *bs;
3595
3596 list = NULL;
3597 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3598 entry = g_malloc0(sizeof(*entry));
3599 entry->value = bdrv_block_device_info(bs);
3600 entry->next = list;
3601 list = entry;
3602 }
3603
3604 return list;
3605}
3606
Benoît Canet12d3ba82014-01-23 21:31:35 +01003607BlockDriverState *bdrv_lookup_bs(const char *device,
3608 const char *node_name,
3609 Error **errp)
3610{
3611 BlockDriverState *bs = NULL;
3612
Benoît Canet12d3ba82014-01-23 21:31:35 +01003613 if (device) {
3614 bs = bdrv_find(device);
3615
Benoît Canetdd67fa52014-02-12 17:15:06 +01003616 if (bs) {
3617 return bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003618 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003619 }
3620
Benoît Canetdd67fa52014-02-12 17:15:06 +01003621 if (node_name) {
3622 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003623
Benoît Canetdd67fa52014-02-12 17:15:06 +01003624 if (bs) {
3625 return bs;
3626 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003627 }
3628
Benoît Canetdd67fa52014-02-12 17:15:06 +01003629 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3630 device ? device : "",
3631 node_name ? node_name : "");
3632 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003633}
3634
Markus Armbruster2f399b02010-06-02 18:55:20 +02003635BlockDriverState *bdrv_next(BlockDriverState *bs)
3636{
3637 if (!bs) {
3638 return QTAILQ_FIRST(&bdrv_states);
3639 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003640 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003641}
3642
aliguori51de9762009-03-05 23:00:43 +00003643void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00003644{
3645 BlockDriverState *bs;
3646
Benoît Canetdc364f42014-01-23 21:31:32 +01003647 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
aliguori51de9762009-03-05 23:00:43 +00003648 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00003649 }
3650}
3651
bellardea2384d2004-08-01 21:59:26 +00003652const char *bdrv_get_device_name(BlockDriverState *bs)
3653{
3654 return bs->device_name;
3655}
3656
Markus Armbrusterc8433282012-06-05 16:49:24 +02003657int bdrv_get_flags(BlockDriverState *bs)
3658{
3659 return bs->open_flags;
3660}
3661
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003662int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00003663{
3664 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003665 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00003666
Benoît Canetdc364f42014-01-23 21:31:32 +01003667 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003668 int ret = bdrv_flush(bs);
3669 if (ret < 0 && !result) {
3670 result = ret;
3671 }
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003672 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003673
3674 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00003675}
3676
Peter Lieven3ac21622013-06-28 12:47:42 +02003677int bdrv_has_zero_init_1(BlockDriverState *bs)
3678{
3679 return 1;
3680}
3681
Kevin Wolff2feebb2010-04-14 17:30:35 +02003682int bdrv_has_zero_init(BlockDriverState *bs)
3683{
3684 assert(bs->drv);
3685
Paolo Bonzini11212d82013-09-04 19:00:27 +02003686 /* If BS is a copy on write image, it is initialized to
3687 the contents of the base image, which may not be zeroes. */
3688 if (bs->backing_hd) {
3689 return 0;
3690 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02003691 if (bs->drv->bdrv_has_zero_init) {
3692 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02003693 }
3694
Peter Lieven3ac21622013-06-28 12:47:42 +02003695 /* safe default */
3696 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02003697}
3698
Peter Lieven4ce78692013-10-24 12:06:54 +02003699bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3700{
3701 BlockDriverInfo bdi;
3702
3703 if (bs->backing_hd) {
3704 return false;
3705 }
3706
3707 if (bdrv_get_info(bs, &bdi) == 0) {
3708 return bdi.unallocated_blocks_are_zero;
3709 }
3710
3711 return false;
3712}
3713
3714bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3715{
3716 BlockDriverInfo bdi;
3717
3718 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3719 return false;
3720 }
3721
3722 if (bdrv_get_info(bs, &bdi) == 0) {
3723 return bdi.can_write_zeroes_with_unmap;
3724 }
3725
3726 return false;
3727}
3728
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003729typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003730 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01003731 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003732 int64_t sector_num;
3733 int nb_sectors;
3734 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003735 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003736 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003737} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003738
thsf58c7b32008-06-05 21:53:49 +00003739/*
3740 * Returns true iff the specified sector is present in the disk image. Drivers
3741 * not implementing the functionality are assumed to not support backing files,
3742 * hence all their sectors are reported as allocated.
3743 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003744 * If 'sector_num' is beyond the end of the disk image the return value is 0
3745 * and 'pnum' is set to 0.
3746 *
thsf58c7b32008-06-05 21:53:49 +00003747 * 'pnum' is set to the number of sectors (including and immediately following
3748 * the specified sector) that are known to be in the same
3749 * allocated/unallocated state.
3750 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003751 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
3752 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00003753 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003754static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
3755 int64_t sector_num,
3756 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00003757{
Paolo Bonzini617ccb42013-09-04 19:00:23 +02003758 int64_t length;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003759 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003760 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003761
Paolo Bonzini617ccb42013-09-04 19:00:23 +02003762 length = bdrv_getlength(bs);
3763 if (length < 0) {
3764 return length;
3765 }
3766
3767 if (sector_num >= (length >> BDRV_SECTOR_BITS)) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003768 *pnum = 0;
3769 return 0;
3770 }
3771
3772 n = bs->total_sectors - sector_num;
3773 if (n < nb_sectors) {
3774 nb_sectors = n;
3775 }
3776
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003777 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003778 *pnum = nb_sectors;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02003779 ret = BDRV_BLOCK_DATA;
3780 if (bs->drv->protocol_name) {
3781 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
3782 }
3783 return ret;
thsf58c7b32008-06-05 21:53:49 +00003784 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003785
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003786 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
3787 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02003788 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003789 return ret;
3790 }
3791
Peter Lieven92bc50a2013-10-08 14:43:14 +02003792 if (ret & BDRV_BLOCK_RAW) {
3793 assert(ret & BDRV_BLOCK_OFFSET_VALID);
3794 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3795 *pnum, pnum);
3796 }
3797
Peter Lievenc3d86882013-10-24 12:07:04 +02003798 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
3799 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003800 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02003801 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003802 BlockDriverState *bs2 = bs->backing_hd;
3803 int64_t length2 = bdrv_getlength(bs2);
3804 if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) {
3805 ret |= BDRV_BLOCK_ZERO;
3806 }
3807 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003808 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003809
3810 if (bs->file &&
3811 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
3812 (ret & BDRV_BLOCK_OFFSET_VALID)) {
3813 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3814 *pnum, pnum);
3815 if (ret2 >= 0) {
3816 /* Ignore errors. This is just providing extra information, it
3817 * is useful but not necessary.
3818 */
3819 ret |= (ret2 & BDRV_BLOCK_ZERO);
3820 }
3821 }
3822
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003823 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003824}
3825
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003826/* Coroutine wrapper for bdrv_get_block_status() */
3827static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003828{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003829 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003830 BlockDriverState *bs = data->bs;
3831
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003832 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
3833 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003834 data->done = true;
3835}
3836
3837/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003838 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003839 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003840 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003841 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003842int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
3843 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003844{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003845 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003846 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003847 .bs = bs,
3848 .sector_num = sector_num,
3849 .nb_sectors = nb_sectors,
3850 .pnum = pnum,
3851 .done = false,
3852 };
3853
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003854 if (qemu_in_coroutine()) {
3855 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003856 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003857 } else {
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003858 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003859 qemu_coroutine_enter(co, &data);
3860 while (!data.done) {
3861 qemu_aio_wait();
3862 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003863 }
3864 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00003865}
3866
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003867int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
3868 int nb_sectors, int *pnum)
3869{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02003870 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
3871 if (ret < 0) {
3872 return ret;
3873 }
3874 return
3875 (ret & BDRV_BLOCK_DATA) ||
3876 ((ret & BDRV_BLOCK_ZERO) && !bdrv_has_zero_init(bs));
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003877}
3878
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003879/*
3880 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
3881 *
3882 * Return true if the given sector is allocated in any image between
3883 * BASE and TOP (inclusive). BASE can be NULL to check if the given
3884 * sector is allocated in any image of the chain. Return false otherwise.
3885 *
3886 * 'pnum' is set to the number of sectors (including and immediately following
3887 * the specified sector) that are known to be in the same
3888 * allocated/unallocated state.
3889 *
3890 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02003891int bdrv_is_allocated_above(BlockDriverState *top,
3892 BlockDriverState *base,
3893 int64_t sector_num,
3894 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003895{
3896 BlockDriverState *intermediate;
3897 int ret, n = nb_sectors;
3898
3899 intermediate = top;
3900 while (intermediate && intermediate != base) {
3901 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003902 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
3903 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003904 if (ret < 0) {
3905 return ret;
3906 } else if (ret) {
3907 *pnum = pnum_inter;
3908 return 1;
3909 }
3910
3911 /*
3912 * [sector_num, nb_sectors] is unallocated on top but intermediate
3913 * might have
3914 *
3915 * [sector_num+x, nr_sectors] allocated.
3916 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08003917 if (n > pnum_inter &&
3918 (intermediate == top ||
3919 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003920 n = pnum_inter;
3921 }
3922
3923 intermediate = intermediate->backing_hd;
3924 }
3925
3926 *pnum = n;
3927 return 0;
3928}
3929
aliguori045df332009-03-05 23:00:48 +00003930const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3931{
3932 if (bs->backing_hd && bs->backing_hd->encrypted)
3933 return bs->backing_file;
3934 else if (bs->encrypted)
3935 return bs->filename;
3936 else
3937 return NULL;
3938}
3939
ths5fafdf22007-09-16 21:08:06 +00003940void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00003941 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00003942{
Kevin Wolf3574c602011-10-26 11:02:11 +02003943 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00003944}
3945
ths5fafdf22007-09-16 21:08:06 +00003946int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00003947 const uint8_t *buf, int nb_sectors)
3948{
3949 BlockDriver *drv = bs->drv;
3950 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003951 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00003952 if (!drv->bdrv_write_compressed)
3953 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02003954 if (bdrv_check_request(bs, sector_num, nb_sectors))
3955 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003956
Fam Zhenge4654d22013-11-13 18:29:43 +08003957 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003958
bellardfaea38e2006-08-05 21:31:00 +00003959 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
3960}
ths3b46e622007-09-17 08:09:54 +00003961
bellardfaea38e2006-08-05 21:31:00 +00003962int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3963{
3964 BlockDriver *drv = bs->drv;
3965 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003966 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00003967 if (!drv->bdrv_get_info)
3968 return -ENOTSUP;
3969 memset(bdi, 0, sizeof(*bdi));
3970 return drv->bdrv_get_info(bs, bdi);
3971}
3972
Max Reitzeae041f2013-10-09 10:46:16 +02003973ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3974{
3975 BlockDriver *drv = bs->drv;
3976 if (drv && drv->bdrv_get_specific_info) {
3977 return drv->bdrv_get_specific_info(bs);
3978 }
3979 return NULL;
3980}
3981
Christoph Hellwig45566e92009-07-10 23:11:57 +02003982int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
3983 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00003984{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02003985 QEMUIOVector qiov;
3986 struct iovec iov = {
3987 .iov_base = (void *) buf,
3988 .iov_len = size,
3989 };
3990
3991 qemu_iovec_init_external(&qiov, &iov, 1);
3992 return bdrv_writev_vmstate(bs, &qiov, pos);
3993}
3994
3995int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
3996{
aliguori178e08a2009-04-05 19:10:55 +00003997 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02003998
3999 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00004000 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004001 } else if (drv->bdrv_save_vmstate) {
4002 return drv->bdrv_save_vmstate(bs, qiov, pos);
4003 } else if (bs->file) {
4004 return bdrv_writev_vmstate(bs->file, qiov, pos);
4005 }
4006
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004007 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004008}
4009
Christoph Hellwig45566e92009-07-10 23:11:57 +02004010int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4011 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004012{
4013 BlockDriver *drv = bs->drv;
4014 if (!drv)
4015 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004016 if (drv->bdrv_load_vmstate)
4017 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4018 if (bs->file)
4019 return bdrv_load_vmstate(bs->file, buf, pos, size);
4020 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004021}
4022
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004023void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4024{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004025 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004026 return;
4027 }
4028
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004029 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004030}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004031
Kevin Wolf41c695c2012-12-06 14:32:58 +01004032int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4033 const char *tag)
4034{
4035 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4036 bs = bs->file;
4037 }
4038
4039 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4040 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4041 }
4042
4043 return -ENOTSUP;
4044}
4045
Fam Zheng4cc70e92013-11-20 10:01:54 +08004046int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4047{
4048 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4049 bs = bs->file;
4050 }
4051
4052 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4053 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4054 }
4055
4056 return -ENOTSUP;
4057}
4058
Kevin Wolf41c695c2012-12-06 14:32:58 +01004059int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4060{
4061 while (bs && bs->drv && !bs->drv->bdrv_debug_resume) {
4062 bs = bs->file;
4063 }
4064
4065 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4066 return bs->drv->bdrv_debug_resume(bs, tag);
4067 }
4068
4069 return -ENOTSUP;
4070}
4071
4072bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4073{
4074 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4075 bs = bs->file;
4076 }
4077
4078 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4079 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4080 }
4081
4082 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004083}
4084
Blue Swirl199630b2010-07-25 20:49:34 +00004085int bdrv_is_snapshot(BlockDriverState *bs)
4086{
4087 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4088}
4089
Jeff Codyb1b1d782012-10-16 15:49:09 -04004090/* backing_file can either be relative, or absolute, or a protocol. If it is
4091 * relative, it must be relative to the chain. So, passing in bs->filename
4092 * from a BDS as backing_file should not be done, as that may be relative to
4093 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004094BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4095 const char *backing_file)
4096{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004097 char *filename_full = NULL;
4098 char *backing_file_full = NULL;
4099 char *filename_tmp = NULL;
4100 int is_protocol = 0;
4101 BlockDriverState *curr_bs = NULL;
4102 BlockDriverState *retval = NULL;
4103
4104 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004105 return NULL;
4106 }
4107
Jeff Codyb1b1d782012-10-16 15:49:09 -04004108 filename_full = g_malloc(PATH_MAX);
4109 backing_file_full = g_malloc(PATH_MAX);
4110 filename_tmp = g_malloc(PATH_MAX);
4111
4112 is_protocol = path_has_protocol(backing_file);
4113
4114 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4115
4116 /* If either of the filename paths is actually a protocol, then
4117 * compare unmodified paths; otherwise make paths relative */
4118 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4119 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4120 retval = curr_bs->backing_hd;
4121 break;
4122 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004123 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004124 /* If not an absolute filename path, make it relative to the current
4125 * image's filename path */
4126 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4127 backing_file);
4128
4129 /* We are going to compare absolute pathnames */
4130 if (!realpath(filename_tmp, filename_full)) {
4131 continue;
4132 }
4133
4134 /* We need to make sure the backing filename we are comparing against
4135 * is relative to the current image filename (or absolute) */
4136 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4137 curr_bs->backing_file);
4138
4139 if (!realpath(filename_tmp, backing_file_full)) {
4140 continue;
4141 }
4142
4143 if (strcmp(backing_file_full, filename_full) == 0) {
4144 retval = curr_bs->backing_hd;
4145 break;
4146 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004147 }
4148 }
4149
Jeff Codyb1b1d782012-10-16 15:49:09 -04004150 g_free(filename_full);
4151 g_free(backing_file_full);
4152 g_free(filename_tmp);
4153 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004154}
4155
Benoît Canetf198fd12012-08-02 10:22:47 +02004156int bdrv_get_backing_file_depth(BlockDriverState *bs)
4157{
4158 if (!bs->drv) {
4159 return 0;
4160 }
4161
4162 if (!bs->backing_hd) {
4163 return 0;
4164 }
4165
4166 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4167}
4168
Jeff Cody79fac562012-09-27 13:29:15 -04004169BlockDriverState *bdrv_find_base(BlockDriverState *bs)
4170{
4171 BlockDriverState *curr_bs = NULL;
4172
4173 if (!bs) {
4174 return NULL;
4175 }
4176
4177 curr_bs = bs;
4178
4179 while (curr_bs->backing_hd) {
4180 curr_bs = curr_bs->backing_hd;
4181 }
4182 return curr_bs;
4183}
4184
bellard83f64092006-08-01 16:21:11 +00004185/**************************************************************/
4186/* async I/Os */
4187
aliguori3b69e4b2009-01-22 16:59:24 +00004188BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00004189 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00004190 BlockDriverCompletionFunc *cb, void *opaque)
4191{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004192 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4193
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004194 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004195 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004196}
4197
aliguorif141eaf2009-04-07 18:43:24 +00004198BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4199 QEMUIOVector *qiov, int nb_sectors,
4200 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004201{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004202 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4203
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004204 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004205 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004206}
4207
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004208BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4209 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4210 BlockDriverCompletionFunc *cb, void *opaque)
4211{
4212 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4213
4214 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4215 BDRV_REQ_ZERO_WRITE | flags,
4216 cb, opaque, true);
4217}
4218
Kevin Wolf40b4f532009-09-09 17:53:37 +02004219
4220typedef struct MultiwriteCB {
4221 int error;
4222 int num_requests;
4223 int num_callbacks;
4224 struct {
4225 BlockDriverCompletionFunc *cb;
4226 void *opaque;
4227 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004228 } callbacks[];
4229} MultiwriteCB;
4230
4231static void multiwrite_user_cb(MultiwriteCB *mcb)
4232{
4233 int i;
4234
4235 for (i = 0; i < mcb->num_callbacks; i++) {
4236 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004237 if (mcb->callbacks[i].free_qiov) {
4238 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4239 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004240 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004241 }
4242}
4243
4244static void multiwrite_cb(void *opaque, int ret)
4245{
4246 MultiwriteCB *mcb = opaque;
4247
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004248 trace_multiwrite_cb(mcb, ret);
4249
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004250 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004251 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004252 }
4253
4254 mcb->num_requests--;
4255 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004256 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004257 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004258 }
4259}
4260
4261static int multiwrite_req_compare(const void *a, const void *b)
4262{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004263 const BlockRequest *req1 = a, *req2 = b;
4264
4265 /*
4266 * Note that we can't simply subtract req2->sector from req1->sector
4267 * here as that could overflow the return value.
4268 */
4269 if (req1->sector > req2->sector) {
4270 return 1;
4271 } else if (req1->sector < req2->sector) {
4272 return -1;
4273 } else {
4274 return 0;
4275 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004276}
4277
4278/*
4279 * Takes a bunch of requests and tries to merge them. Returns the number of
4280 * requests that remain after merging.
4281 */
4282static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4283 int num_reqs, MultiwriteCB *mcb)
4284{
4285 int i, outidx;
4286
4287 // Sort requests by start sector
4288 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4289
4290 // Check if adjacent requests touch the same clusters. If so, combine them,
4291 // filling up gaps with zero sectors.
4292 outidx = 0;
4293 for (i = 1; i < num_reqs; i++) {
4294 int merge = 0;
4295 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4296
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004297 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004298 if (reqs[i].sector <= oldreq_last) {
4299 merge = 1;
4300 }
4301
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004302 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4303 merge = 0;
4304 }
4305
Kevin Wolf40b4f532009-09-09 17:53:37 +02004306 if (merge) {
4307 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004308 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004309 qemu_iovec_init(qiov,
4310 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4311
4312 // Add the first request to the merged one. If the requests are
4313 // overlapping, drop the last sectors of the first request.
4314 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004315 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004316
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004317 // We should need to add any zeros between the two requests
4318 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004319
4320 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004321 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004322
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004323 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004324 reqs[outidx].qiov = qiov;
4325
4326 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4327 } else {
4328 outidx++;
4329 reqs[outidx].sector = reqs[i].sector;
4330 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4331 reqs[outidx].qiov = reqs[i].qiov;
4332 }
4333 }
4334
4335 return outidx + 1;
4336}
4337
4338/*
4339 * Submit multiple AIO write requests at once.
4340 *
4341 * On success, the function returns 0 and all requests in the reqs array have
4342 * been submitted. In error case this function returns -1, and any of the
4343 * requests may or may not be submitted yet. In particular, this means that the
4344 * callback will be called for some of the requests, for others it won't. The
4345 * caller must check the error field of the BlockRequest to wait for the right
4346 * callbacks (if error != 0, no callback will be called).
4347 *
4348 * The implementation may modify the contents of the reqs array, e.g. to merge
4349 * requests. However, the fields opaque and error are left unmodified as they
4350 * are used to signal failure for a single request to the caller.
4351 */
4352int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4353{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004354 MultiwriteCB *mcb;
4355 int i;
4356
Ryan Harper301db7c2011-03-07 10:01:04 -06004357 /* don't submit writes if we don't have a medium */
4358 if (bs->drv == NULL) {
4359 for (i = 0; i < num_reqs; i++) {
4360 reqs[i].error = -ENOMEDIUM;
4361 }
4362 return -1;
4363 }
4364
Kevin Wolf40b4f532009-09-09 17:53:37 +02004365 if (num_reqs == 0) {
4366 return 0;
4367 }
4368
4369 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004370 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004371 mcb->num_requests = 0;
4372 mcb->num_callbacks = num_reqs;
4373
4374 for (i = 0; i < num_reqs; i++) {
4375 mcb->callbacks[i].cb = reqs[i].cb;
4376 mcb->callbacks[i].opaque = reqs[i].opaque;
4377 }
4378
4379 // Check for mergable requests
4380 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4381
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004382 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4383
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004384 /* Run the aio requests. */
4385 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004386 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004387 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4388 reqs[i].nb_sectors, reqs[i].flags,
4389 multiwrite_cb, mcb,
4390 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004391 }
4392
4393 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004394}
4395
bellard83f64092006-08-01 16:21:11 +00004396void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004397{
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004398 acb->aiocb_info->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00004399}
4400
4401/**************************************************************/
4402/* async block device emulation */
4403
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004404typedef struct BlockDriverAIOCBSync {
4405 BlockDriverAIOCB common;
4406 QEMUBH *bh;
4407 int ret;
4408 /* vector translation state */
4409 QEMUIOVector *qiov;
4410 uint8_t *bounce;
4411 int is_write;
4412} BlockDriverAIOCBSync;
4413
4414static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
4415{
Kevin Wolfb666d232010-05-05 11:44:39 +02004416 BlockDriverAIOCBSync *acb =
4417 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03004418 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004419 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004420 qemu_aio_release(acb);
4421}
4422
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004423static const AIOCBInfo bdrv_em_aiocb_info = {
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004424 .aiocb_size = sizeof(BlockDriverAIOCBSync),
4425 .cancel = bdrv_aio_cancel_em,
4426};
4427
bellard83f64092006-08-01 16:21:11 +00004428static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004429{
pbrookce1a14d2006-08-07 02:38:06 +00004430 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004431
aliguorif141eaf2009-04-07 18:43:24 +00004432 if (!acb->is_write)
Michael Tokarev03396142012-06-07 20:17:55 +04004433 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00004434 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004435 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004436 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004437 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00004438 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00004439}
bellardbeac80c2006-06-26 20:08:57 +00004440
aliguorif141eaf2009-04-07 18:43:24 +00004441static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4442 int64_t sector_num,
4443 QEMUIOVector *qiov,
4444 int nb_sectors,
4445 BlockDriverCompletionFunc *cb,
4446 void *opaque,
4447 int is_write)
4448
bellardea2384d2004-08-01 21:59:26 +00004449{
pbrookce1a14d2006-08-07 02:38:06 +00004450 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004451
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004452 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004453 acb->is_write = is_write;
4454 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00004455 acb->bounce = qemu_blockalign(bs, qiov->size);
Paolo Bonzini3f3aace2011-11-14 17:50:54 +01004456 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004457
4458 if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004459 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004460 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004461 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004462 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004463 }
4464
pbrookce1a14d2006-08-07 02:38:06 +00004465 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004466
pbrookce1a14d2006-08-07 02:38:06 +00004467 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004468}
4469
aliguorif141eaf2009-04-07 18:43:24 +00004470static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4471 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00004472 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004473{
aliguorif141eaf2009-04-07 18:43:24 +00004474 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004475}
4476
aliguorif141eaf2009-04-07 18:43:24 +00004477static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4478 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4479 BlockDriverCompletionFunc *cb, void *opaque)
4480{
4481 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4482}
4483
Kevin Wolf68485422011-06-30 10:05:46 +02004484
4485typedef struct BlockDriverAIOCBCoroutine {
4486 BlockDriverAIOCB common;
4487 BlockRequest req;
4488 bool is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004489 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004490 QEMUBH* bh;
4491} BlockDriverAIOCBCoroutine;
4492
4493static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
4494{
Kevin Wolfd318aea2012-11-13 16:35:08 +01004495 BlockDriverAIOCBCoroutine *acb =
4496 container_of(blockacb, BlockDriverAIOCBCoroutine, common);
4497 bool done = false;
4498
4499 acb->done = &done;
4500 while (!done) {
4501 qemu_aio_wait();
4502 }
Kevin Wolf68485422011-06-30 10:05:46 +02004503}
4504
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004505static const AIOCBInfo bdrv_em_co_aiocb_info = {
Kevin Wolf68485422011-06-30 10:05:46 +02004506 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
4507 .cancel = bdrv_aio_co_cancel_em,
4508};
4509
Paolo Bonzini35246a62011-10-14 10:41:29 +02004510static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004511{
4512 BlockDriverAIOCBCoroutine *acb = opaque;
4513
4514 acb->common.cb(acb->common.opaque, acb->req.error);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004515
4516 if (acb->done) {
4517 *acb->done = true;
4518 }
4519
Kevin Wolf68485422011-06-30 10:05:46 +02004520 qemu_bh_delete(acb->bh);
4521 qemu_aio_release(acb);
4522}
4523
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004524/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4525static void coroutine_fn bdrv_co_do_rw(void *opaque)
4526{
4527 BlockDriverAIOCBCoroutine *acb = opaque;
4528 BlockDriverState *bs = acb->common.bs;
4529
4530 if (!acb->is_write) {
4531 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004532 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004533 } else {
4534 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004535 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004536 }
4537
Paolo Bonzini35246a62011-10-14 10:41:29 +02004538 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004539 qemu_bh_schedule(acb->bh);
4540}
4541
Kevin Wolf68485422011-06-30 10:05:46 +02004542static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4543 int64_t sector_num,
4544 QEMUIOVector *qiov,
4545 int nb_sectors,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004546 BdrvRequestFlags flags,
Kevin Wolf68485422011-06-30 10:05:46 +02004547 BlockDriverCompletionFunc *cb,
4548 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004549 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004550{
4551 Coroutine *co;
4552 BlockDriverAIOCBCoroutine *acb;
4553
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004554 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolf68485422011-06-30 10:05:46 +02004555 acb->req.sector = sector_num;
4556 acb->req.nb_sectors = nb_sectors;
4557 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004558 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004559 acb->is_write = is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004560 acb->done = NULL;
Kevin Wolf68485422011-06-30 10:05:46 +02004561
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004562 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004563 qemu_coroutine_enter(co, acb);
4564
4565 return &acb->common;
4566}
4567
Paolo Bonzini07f07612011-10-17 12:32:12 +02004568static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004569{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004570 BlockDriverAIOCBCoroutine *acb = opaque;
4571 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004572
Paolo Bonzini07f07612011-10-17 12:32:12 +02004573 acb->req.error = bdrv_co_flush(bs);
4574 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004575 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004576}
4577
Paolo Bonzini07f07612011-10-17 12:32:12 +02004578BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02004579 BlockDriverCompletionFunc *cb, void *opaque)
4580{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004581 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004582
Paolo Bonzini07f07612011-10-17 12:32:12 +02004583 Coroutine *co;
4584 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004585
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004586 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004587 acb->done = NULL;
4588
Paolo Bonzini07f07612011-10-17 12:32:12 +02004589 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4590 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004591
Alexander Graf016f5cf2010-05-26 17:51:49 +02004592 return &acb->common;
4593}
4594
Paolo Bonzini4265d622011-10-17 12:32:14 +02004595static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4596{
4597 BlockDriverAIOCBCoroutine *acb = opaque;
4598 BlockDriverState *bs = acb->common.bs;
4599
4600 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
4601 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
4602 qemu_bh_schedule(acb->bh);
4603}
4604
4605BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
4606 int64_t sector_num, int nb_sectors,
4607 BlockDriverCompletionFunc *cb, void *opaque)
4608{
4609 Coroutine *co;
4610 BlockDriverAIOCBCoroutine *acb;
4611
4612 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4613
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004614 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004615 acb->req.sector = sector_num;
4616 acb->req.nb_sectors = nb_sectors;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004617 acb->done = NULL;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004618 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4619 qemu_coroutine_enter(co, acb);
4620
4621 return &acb->common;
4622}
4623
bellardea2384d2004-08-01 21:59:26 +00004624void bdrv_init(void)
4625{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05004626 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00004627}
pbrookce1a14d2006-08-07 02:38:06 +00004628
Markus Armbrustereb852012009-10-27 18:41:44 +01004629void bdrv_init_with_whitelist(void)
4630{
4631 use_bdrv_whitelist = 1;
4632 bdrv_init();
4633}
4634
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004635void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004636 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00004637{
pbrookce1a14d2006-08-07 02:38:06 +00004638 BlockDriverAIOCB *acb;
4639
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004640 acb = g_slice_alloc(aiocb_info->aiocb_size);
4641 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00004642 acb->bs = bs;
4643 acb->cb = cb;
4644 acb->opaque = opaque;
4645 return acb;
4646}
4647
4648void qemu_aio_release(void *p)
4649{
Stefan Hajnoczid37c9752012-10-31 16:34:36 +01004650 BlockDriverAIOCB *acb = p;
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004651 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
pbrookce1a14d2006-08-07 02:38:06 +00004652}
bellard19cb3732006-08-19 11:45:59 +00004653
4654/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004655/* Coroutine block device emulation */
4656
4657typedef struct CoroutineIOCompletion {
4658 Coroutine *coroutine;
4659 int ret;
4660} CoroutineIOCompletion;
4661
4662static void bdrv_co_io_em_complete(void *opaque, int ret)
4663{
4664 CoroutineIOCompletion *co = opaque;
4665
4666 co->ret = ret;
4667 qemu_coroutine_enter(co->coroutine, NULL);
4668}
4669
4670static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4671 int nb_sectors, QEMUIOVector *iov,
4672 bool is_write)
4673{
4674 CoroutineIOCompletion co = {
4675 .coroutine = qemu_coroutine_self(),
4676 };
4677 BlockDriverAIOCB *acb;
4678
4679 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004680 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4681 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004682 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004683 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4684 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004685 }
4686
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01004687 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004688 if (!acb) {
4689 return -EIO;
4690 }
4691 qemu_coroutine_yield();
4692
4693 return co.ret;
4694}
4695
4696static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4697 int64_t sector_num, int nb_sectors,
4698 QEMUIOVector *iov)
4699{
4700 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4701}
4702
4703static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4704 int64_t sector_num, int nb_sectors,
4705 QEMUIOVector *iov)
4706{
4707 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4708}
4709
Paolo Bonzini07f07612011-10-17 12:32:12 +02004710static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004711{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004712 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004713
Paolo Bonzini07f07612011-10-17 12:32:12 +02004714 rwco->ret = bdrv_co_flush(rwco->bs);
4715}
4716
4717int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4718{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004719 int ret;
4720
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004721 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02004722 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004723 }
4724
Kevin Wolfca716362011-11-10 18:13:59 +01004725 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004726 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004727 if (bs->drv->bdrv_co_flush_to_os) {
4728 ret = bs->drv->bdrv_co_flush_to_os(bs);
4729 if (ret < 0) {
4730 return ret;
4731 }
4732 }
4733
Kevin Wolfca716362011-11-10 18:13:59 +01004734 /* But don't actually force it to the disk with cache=unsafe */
4735 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02004736 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01004737 }
4738
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004739 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004740 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004741 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004742 } else if (bs->drv->bdrv_aio_flush) {
4743 BlockDriverAIOCB *acb;
4744 CoroutineIOCompletion co = {
4745 .coroutine = qemu_coroutine_self(),
4746 };
4747
4748 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
4749 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004750 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004751 } else {
4752 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004753 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004754 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02004755 } else {
4756 /*
4757 * Some block drivers always operate in either writethrough or unsafe
4758 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
4759 * know how the server works (because the behaviour is hardcoded or
4760 * depends on server-side configuration), so we can't ensure that
4761 * everything is safe on disk. Returning an error doesn't work because
4762 * that would break guests even if the server operates in writethrough
4763 * mode.
4764 *
4765 * Let's hope the user knows what he's doing.
4766 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004767 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004768 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004769 if (ret < 0) {
4770 return ret;
4771 }
4772
4773 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
4774 * in the case of cache=unsafe, so there are no useless flushes.
4775 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02004776flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004777 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004778}
4779
Anthony Liguori0f154232011-11-14 15:09:45 -06004780void bdrv_invalidate_cache(BlockDriverState *bs)
4781{
4782 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
4783 bs->drv->bdrv_invalidate_cache(bs);
4784 }
4785}
4786
4787void bdrv_invalidate_cache_all(void)
4788{
4789 BlockDriverState *bs;
4790
Benoît Canetdc364f42014-01-23 21:31:32 +01004791 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Anthony Liguori0f154232011-11-14 15:09:45 -06004792 bdrv_invalidate_cache(bs);
4793 }
4794}
4795
Benoît Canet07789262012-03-23 08:36:49 +01004796void bdrv_clear_incoming_migration_all(void)
4797{
4798 BlockDriverState *bs;
4799
Benoît Canetdc364f42014-01-23 21:31:32 +01004800 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Benoît Canet07789262012-03-23 08:36:49 +01004801 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
4802 }
4803}
4804
Paolo Bonzini07f07612011-10-17 12:32:12 +02004805int bdrv_flush(BlockDriverState *bs)
4806{
4807 Coroutine *co;
4808 RwCo rwco = {
4809 .bs = bs,
4810 .ret = NOT_DONE,
4811 };
4812
4813 if (qemu_in_coroutine()) {
4814 /* Fast-path if already in coroutine context */
4815 bdrv_flush_co_entry(&rwco);
4816 } else {
4817 co = qemu_coroutine_create(bdrv_flush_co_entry);
4818 qemu_coroutine_enter(co, &rwco);
4819 while (rwco.ret == NOT_DONE) {
4820 qemu_aio_wait();
4821 }
4822 }
4823
4824 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004825}
4826
Kevin Wolf775aa8b2013-12-05 12:09:38 +01004827typedef struct DiscardCo {
4828 BlockDriverState *bs;
4829 int64_t sector_num;
4830 int nb_sectors;
4831 int ret;
4832} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004833static void coroutine_fn bdrv_discard_co_entry(void *opaque)
4834{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01004835 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004836
4837 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
4838}
4839
Peter Lieven6f14da52013-10-24 12:06:59 +02004840/* if no limit is specified in the BlockLimits use a default
4841 * of 32768 512-byte sectors (16 MiB) per request.
4842 */
4843#define MAX_DISCARD_DEFAULT 32768
4844
Paolo Bonzini4265d622011-10-17 12:32:14 +02004845int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
4846 int nb_sectors)
4847{
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004848 int max_discard;
4849
Paolo Bonzini4265d622011-10-17 12:32:14 +02004850 if (!bs->drv) {
4851 return -ENOMEDIUM;
4852 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
4853 return -EIO;
4854 } else if (bs->read_only) {
4855 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01004856 }
4857
Fam Zhenge4654d22013-11-13 18:29:43 +08004858 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01004859
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01004860 /* Do nothing if disabled. */
4861 if (!(bs->open_flags & BDRV_O_UNMAP)) {
4862 return 0;
4863 }
4864
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004865 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02004866 return 0;
4867 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004868
4869 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
4870 while (nb_sectors > 0) {
4871 int ret;
4872 int num = nb_sectors;
4873
4874 /* align request */
4875 if (bs->bl.discard_alignment &&
4876 num >= bs->bl.discard_alignment &&
4877 sector_num % bs->bl.discard_alignment) {
4878 if (num > bs->bl.discard_alignment) {
4879 num = bs->bl.discard_alignment;
4880 }
4881 num -= sector_num % bs->bl.discard_alignment;
4882 }
4883
4884 /* limit request size */
4885 if (num > max_discard) {
4886 num = max_discard;
4887 }
4888
4889 if (bs->drv->bdrv_co_discard) {
4890 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
4891 } else {
4892 BlockDriverAIOCB *acb;
4893 CoroutineIOCompletion co = {
4894 .coroutine = qemu_coroutine_self(),
4895 };
4896
4897 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
4898 bdrv_co_io_em_complete, &co);
4899 if (acb == NULL) {
4900 return -EIO;
4901 } else {
4902 qemu_coroutine_yield();
4903 ret = co.ret;
4904 }
4905 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01004906 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004907 return ret;
4908 }
4909
4910 sector_num += num;
4911 nb_sectors -= num;
4912 }
4913 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004914}
4915
4916int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
4917{
4918 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01004919 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02004920 .bs = bs,
4921 .sector_num = sector_num,
4922 .nb_sectors = nb_sectors,
4923 .ret = NOT_DONE,
4924 };
4925
4926 if (qemu_in_coroutine()) {
4927 /* Fast-path if already in coroutine context */
4928 bdrv_discard_co_entry(&rwco);
4929 } else {
4930 co = qemu_coroutine_create(bdrv_discard_co_entry);
4931 qemu_coroutine_enter(co, &rwco);
4932 while (rwco.ret == NOT_DONE) {
4933 qemu_aio_wait();
4934 }
4935 }
4936
4937 return rwco.ret;
4938}
4939
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004940/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00004941/* removable device support */
4942
4943/**
4944 * Return TRUE if the media is present
4945 */
4946int bdrv_is_inserted(BlockDriverState *bs)
4947{
4948 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02004949
bellard19cb3732006-08-19 11:45:59 +00004950 if (!drv)
4951 return 0;
4952 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02004953 return 1;
4954 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00004955}
4956
4957/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02004958 * Return whether the media changed since the last call to this
4959 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00004960 */
4961int bdrv_media_changed(BlockDriverState *bs)
4962{
4963 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00004964
Markus Armbruster8e49ca42011-08-03 15:08:08 +02004965 if (drv && drv->bdrv_media_changed) {
4966 return drv->bdrv_media_changed(bs);
4967 }
4968 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00004969}
4970
4971/**
4972 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
4973 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02004974void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00004975{
4976 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00004977
Markus Armbruster822e1cd2011-07-20 18:23:42 +02004978 if (drv && drv->bdrv_eject) {
4979 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00004980 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02004981
4982 if (bs->device_name[0] != '\0') {
4983 bdrv_emit_qmp_eject_event(bs, eject_flag);
4984 }
bellard19cb3732006-08-19 11:45:59 +00004985}
4986
bellard19cb3732006-08-19 11:45:59 +00004987/**
4988 * Lock or unlock the media (if it is locked, the user won't be able
4989 * to eject it manually).
4990 */
Markus Armbruster025e8492011-09-06 18:58:47 +02004991void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00004992{
4993 BlockDriver *drv = bs->drv;
4994
Markus Armbruster025e8492011-09-06 18:58:47 +02004995 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01004996
Markus Armbruster025e8492011-09-06 18:58:47 +02004997 if (drv && drv->bdrv_lock_medium) {
4998 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00004999 }
5000}
ths985a03b2007-12-24 16:10:43 +00005001
5002/* needed for generic scsi interface */
5003
5004int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5005{
5006 BlockDriver *drv = bs->drv;
5007
5008 if (drv && drv->bdrv_ioctl)
5009 return drv->bdrv_ioctl(bs, req, buf);
5010 return -ENOTSUP;
5011}
aliguori7d780662009-03-12 19:57:08 +00005012
aliguori221f7152009-03-28 17:28:41 +00005013BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5014 unsigned long int req, void *buf,
5015 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005016{
aliguori221f7152009-03-28 17:28:41 +00005017 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005018
aliguori221f7152009-03-28 17:28:41 +00005019 if (drv && drv->bdrv_aio_ioctl)
5020 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5021 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005022}
aliguorie268ca52009-04-22 20:20:00 +00005023
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005024void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005025{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005026 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005027}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005028
aliguorie268ca52009-04-22 20:20:00 +00005029void *qemu_blockalign(BlockDriverState *bs, size_t size)
5030{
Kevin Wolf339064d2013-11-28 10:23:32 +01005031 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005032}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005033
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005034/*
5035 * Check if all memory in this vector is sector aligned.
5036 */
5037bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5038{
5039 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005040 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005041
5042 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005043 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005044 return false;
5045 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005046 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005047 return false;
5048 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005049 }
5050
5051 return true;
5052}
5053
Fam Zhenge4654d22013-11-13 18:29:43 +08005054BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005055{
5056 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005057 BdrvDirtyBitmap *bitmap;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005058
Paolo Bonzini50717e92013-01-21 17:09:45 +01005059 assert((granularity & (granularity - 1)) == 0);
5060
Fam Zhenge4654d22013-11-13 18:29:43 +08005061 granularity >>= BDRV_SECTOR_BITS;
5062 assert(granularity);
5063 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
5064 bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
5065 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5066 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5067 return bitmap;
5068}
5069
5070void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5071{
5072 BdrvDirtyBitmap *bm, *next;
5073 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5074 if (bm == bitmap) {
5075 QLIST_REMOVE(bitmap, list);
5076 hbitmap_free(bitmap->bitmap);
5077 g_free(bitmap);
5078 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005079 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005080 }
5081}
5082
Fam Zheng21b56832013-11-13 18:29:44 +08005083BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5084{
5085 BdrvDirtyBitmap *bm;
5086 BlockDirtyInfoList *list = NULL;
5087 BlockDirtyInfoList **plist = &list;
5088
5089 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5090 BlockDirtyInfo *info = g_malloc0(sizeof(BlockDirtyInfo));
5091 BlockDirtyInfoList *entry = g_malloc0(sizeof(BlockDirtyInfoList));
5092 info->count = bdrv_get_dirty_count(bs, bm);
5093 info->granularity =
5094 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5095 entry->value = info;
5096 *plist = entry;
5097 plist = &entry->next;
5098 }
5099
5100 return list;
5101}
5102
Fam Zhenge4654d22013-11-13 18:29:43 +08005103int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005104{
Fam Zhenge4654d22013-11-13 18:29:43 +08005105 if (bitmap) {
5106 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005107 } else {
5108 return 0;
5109 }
5110}
5111
Fam Zhenge4654d22013-11-13 18:29:43 +08005112void bdrv_dirty_iter_init(BlockDriverState *bs,
5113 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005114{
Fam Zhenge4654d22013-11-13 18:29:43 +08005115 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005116}
5117
5118void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5119 int nr_sectors)
5120{
Fam Zhenge4654d22013-11-13 18:29:43 +08005121 BdrvDirtyBitmap *bitmap;
5122 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5123 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005124 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005125}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005126
Fam Zhenge4654d22013-11-13 18:29:43 +08005127void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
5128{
5129 BdrvDirtyBitmap *bitmap;
5130 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5131 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5132 }
5133}
5134
5135int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5136{
5137 return hbitmap_count(bitmap->bitmap);
5138}
5139
Fam Zheng9fcb0252013-08-23 09:14:46 +08005140/* Get a reference to bs */
5141void bdrv_ref(BlockDriverState *bs)
5142{
5143 bs->refcnt++;
5144}
5145
5146/* Release a previously grabbed reference to bs.
5147 * If after releasing, reference count is zero, the BlockDriverState is
5148 * deleted. */
5149void bdrv_unref(BlockDriverState *bs)
5150{
5151 assert(bs->refcnt > 0);
5152 if (--bs->refcnt == 0) {
5153 bdrv_delete(bs);
5154 }
5155}
5156
Marcelo Tosattidb593f22011-01-26 12:12:34 -02005157void bdrv_set_in_use(BlockDriverState *bs, int in_use)
5158{
5159 assert(bs->in_use != in_use);
5160 bs->in_use = in_use;
5161}
5162
5163int bdrv_in_use(BlockDriverState *bs)
5164{
5165 return bs->in_use;
5166}
5167
Luiz Capitulino28a72822011-09-26 17:43:50 -03005168void bdrv_iostatus_enable(BlockDriverState *bs)
5169{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005170 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005171 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005172}
5173
5174/* The I/O status is only enabled if the drive explicitly
5175 * enables it _and_ the VM is configured to stop on errors */
5176bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5177{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005178 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005179 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5180 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5181 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005182}
5183
5184void bdrv_iostatus_disable(BlockDriverState *bs)
5185{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005186 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005187}
5188
5189void bdrv_iostatus_reset(BlockDriverState *bs)
5190{
5191 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005192 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005193 if (bs->job) {
5194 block_job_iostatus_reset(bs->job);
5195 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005196 }
5197}
5198
Luiz Capitulino28a72822011-09-26 17:43:50 -03005199void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5200{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005201 assert(bdrv_iostatus_is_enabled(bs));
5202 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005203 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5204 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005205 }
5206}
5207
Christoph Hellwiga597e792011-08-25 08:26:01 +02005208void
5209bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
5210 enum BlockAcctType type)
5211{
5212 assert(type < BDRV_MAX_IOTYPE);
5213
5214 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02005215 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02005216 cookie->type = type;
5217}
5218
5219void
5220bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
5221{
5222 assert(cookie->type < BDRV_MAX_IOTYPE);
5223
5224 bs->nr_bytes[cookie->type] += cookie->bytes;
5225 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02005226 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02005227}
5228
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005229void bdrv_img_create(const char *filename, const char *fmt,
5230 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005231 char *options, uint64_t img_size, int flags,
5232 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005233{
5234 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02005235 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005236 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005237 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005238 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005239 int ret = 0;
5240
5241 /* Find driver and parse its options */
5242 drv = bdrv_find_format(fmt);
5243 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005244 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005245 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005246 }
5247
Kevin Wolf98289622013-07-10 15:47:39 +02005248 proto_drv = bdrv_find_protocol(filename, true);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005249 if (!proto_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005250 error_setg(errp, "Unknown protocol '%s'", filename);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005251 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005252 }
5253
5254 create_options = append_option_parameters(create_options,
5255 drv->create_options);
5256 create_options = append_option_parameters(create_options,
5257 proto_drv->create_options);
5258
5259 /* Create parameter list with default values */
5260 param = parse_option_parameters("", create_options, param);
5261
5262 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
5263
5264 /* Parse -o options */
5265 if (options) {
5266 param = parse_option_parameters(options, create_options, param);
5267 if (param == NULL) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005268 error_setg(errp, "Invalid options for file format '%s'.", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005269 goto out;
5270 }
5271 }
5272
5273 if (base_filename) {
5274 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
5275 base_filename)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005276 error_setg(errp, "Backing file not supported for file format '%s'",
5277 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005278 goto out;
5279 }
5280 }
5281
5282 if (base_fmt) {
5283 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005284 error_setg(errp, "Backing file format not supported for file "
5285 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005286 goto out;
5287 }
5288 }
5289
Jes Sorensen792da932010-12-16 13:52:17 +01005290 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
5291 if (backing_file && backing_file->value.s) {
5292 if (!strcmp(filename, backing_file->value.s)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005293 error_setg(errp, "Error: Trying to create an image with the "
5294 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01005295 goto out;
5296 }
5297 }
5298
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005299 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
5300 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005301 backing_drv = bdrv_find_format(backing_fmt->value.s);
5302 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005303 error_setg(errp, "Unknown backing file format '%s'",
5304 backing_fmt->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005305 goto out;
5306 }
5307 }
5308
5309 // The size for the image must always be specified, with one exception:
5310 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02005311 size = get_option_parameter(param, BLOCK_OPT_SIZE);
5312 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005313 if (backing_file && backing_file->value.s) {
Max Reitz66f6b812013-12-03 14:57:52 +01005314 BlockDriverState *bs;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005315 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005316 char buf[32];
Paolo Bonzini63090da2012-04-12 14:01:03 +02005317 int back_flags;
5318
5319 /* backing files always opened read-only */
5320 back_flags =
5321 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005322
Max Reitzf67503e2014-02-18 18:33:05 +01005323 bs = NULL;
Max Reitzddf56362014-02-18 18:33:06 +01005324 ret = bdrv_open(&bs, backing_file->value.s, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02005325 backing_drv, &local_err);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005326 if (ret < 0) {
Max Reitzcc84d902013-09-06 17:14:26 +02005327 error_setg_errno(errp, -ret, "Could not open '%s': %s",
5328 backing_file->value.s,
5329 error_get_pretty(local_err));
5330 error_free(local_err);
5331 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005332 goto out;
5333 }
5334 bdrv_get_geometry(bs, &size);
5335 size *= 512;
5336
5337 snprintf(buf, sizeof(buf), "%" PRId64, size);
5338 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
Max Reitz66f6b812013-12-03 14:57:52 +01005339
5340 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005341 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005342 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005343 goto out;
5344 }
5345 }
5346
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005347 if (!quiet) {
5348 printf("Formatting '%s', fmt=%s ", filename, fmt);
5349 print_option_parameters(param);
5350 puts("");
5351 }
Max Reitzcc84d902013-09-06 17:14:26 +02005352 ret = bdrv_create(drv, filename, param, &local_err);
5353 if (ret == -EFBIG) {
5354 /* This is generally a better message than whatever the driver would
5355 * deliver (especially because of the cluster_size_hint), since that
5356 * is most probably not much different from "image too large". */
5357 const char *cluster_size_hint = "";
5358 if (get_option_parameter(create_options, BLOCK_OPT_CLUSTER_SIZE)) {
5359 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005360 }
Max Reitzcc84d902013-09-06 17:14:26 +02005361 error_setg(errp, "The image size is too large for file format '%s'"
5362 "%s", fmt, cluster_size_hint);
5363 error_free(local_err);
5364 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005365 }
5366
5367out:
5368 free_option_parameters(create_options);
5369 free_option_parameters(param);
5370
Markus Armbruster84d18f02014-01-30 15:07:28 +01005371 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02005372 error_propagate(errp, local_err);
5373 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005374}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005375
5376AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5377{
5378 /* Currently BlockDriverState always uses the main loop AioContext */
5379 return qemu_get_aio_context();
5380}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005381
5382void bdrv_add_before_write_notifier(BlockDriverState *bs,
5383 NotifierWithReturn *notifier)
5384{
5385 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5386}
Max Reitz6f176b42013-09-03 10:09:50 +02005387
5388int bdrv_amend_options(BlockDriverState *bs, QEMUOptionParameter *options)
5389{
5390 if (bs->drv->bdrv_amend_options == NULL) {
5391 return -ENOTSUP;
5392 }
5393 return bs->drv->bdrv_amend_options(bs, options);
5394}
Benoît Canetf6186f42013-10-02 14:33:48 +02005395
Benoît Canet212a5a82014-01-23 21:31:36 +01005396/* Used to recurse on single child block filters.
5397 * Single child block filter will store their child in bs->file.
5398 */
5399bool bdrv_generic_is_first_non_filter(BlockDriverState *bs,
5400 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02005401{
Benoît Canet212a5a82014-01-23 21:31:36 +01005402 if (!bs->drv) {
5403 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02005404 }
5405
Benoît Canet212a5a82014-01-23 21:31:36 +01005406 if (!bs->drv->authorizations[BS_IS_A_FILTER]) {
5407 if (bs == candidate) {
5408 return true;
5409 } else {
5410 return false;
5411 }
Benoît Canetf6186f42013-10-02 14:33:48 +02005412 }
5413
Benoît Canet212a5a82014-01-23 21:31:36 +01005414 if (!bs->drv->authorizations[BS_FILTER_PASS_DOWN]) {
5415 return false;
5416 }
5417
5418 if (!bs->file) {
5419 return false;
5420 }
5421
5422 return bdrv_recurse_is_first_non_filter(bs->file, candidate);
Benoît Canetf6186f42013-10-02 14:33:48 +02005423}
5424
Benoît Canet212a5a82014-01-23 21:31:36 +01005425bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5426 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02005427{
Benoît Canet212a5a82014-01-23 21:31:36 +01005428 if (bs->drv && bs->drv->bdrv_recurse_is_first_non_filter) {
5429 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
5430 }
5431
5432 return bdrv_generic_is_first_non_filter(bs, candidate);
5433}
5434
5435/* This function checks if the candidate is the first non filter bs down it's
5436 * bs chain. Since we don't have pointers to parents it explore all bs chains
5437 * from the top. Some filters can choose not to pass down the recursion.
5438 */
5439bool bdrv_is_first_non_filter(BlockDriverState *candidate)
5440{
5441 BlockDriverState *bs;
5442
5443 /* walk down the bs forest recursively */
5444 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5445 bool perm;
5446
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01005447 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01005448
5449 /* candidate is the first non filter */
5450 if (perm) {
5451 return true;
5452 }
5453 }
5454
5455 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02005456}