blob: 42659ecc719c828bb306c7d7521e2322d0ff05f1 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010027#include "block/block_int.h"
28#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010029#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010030#include "qapi/qmp/qjson.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010031#include "sysemu/sysemu.h"
Markus Armbruster3ae59582014-09-12 21:26:22 +020032#include "sysemu/blockdev.h" /* FIXME layering violation */
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010033#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010034#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010035#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030036#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010037#include "qemu/timer.h"
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +020038#include "qapi-event.h"
bellardfc01f7e2003-06-30 10:03:06 +000039
Juan Quintela71e72a12009-07-27 16:12:56 +020040#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/types.h>
42#include <sys/stat.h>
43#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000044#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000045#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000046#include <sys/disk.h>
47#endif
blueswir1c5e97232009-03-07 20:06:23 +000048#endif
bellard7674e7b2005-04-26 21:59:26 +000049
aliguori49dc7682009-03-08 16:26:59 +000050#ifdef _WIN32
51#include <windows.h>
52#endif
53
Fam Zhenge4654d22013-11-13 18:29:43 +080054struct BdrvDirtyBitmap {
55 HBitmap *bitmap;
56 QLIST_ENTRY(BdrvDirtyBitmap) list;
57};
58
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010059#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
60
Stefan Hajnoczi2a871512014-07-07 15:15:53 +020061#define COROUTINE_POOL_RESERVATION 64 /* number of coroutines to reserve */
62
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020063static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000064static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000066 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000067static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
68 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000069 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020070static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
73static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
74 int64_t sector_num, int nb_sectors,
75 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010076static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000078 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010079static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
80 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000081 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010082static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
83 int64_t sector_num,
84 QEMUIOVector *qiov,
85 int nb_sectors,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +010086 BdrvRequestFlags flags,
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010087 BlockDriverCompletionFunc *cb,
88 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010089 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010090static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010091static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +020092 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +000093
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010094static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000096
Benoît Canetdc364f42014-01-23 21:31:32 +010097static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
98 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
99
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100100static QLIST_HEAD(, BlockDriver) bdrv_drivers =
101 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +0000102
Markus Armbrustereb852012009-10-27 18:41:44 +0100103/* If non-zero, use only whitelisted block drivers */
104static int use_bdrv_whitelist;
105
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000106#ifdef _WIN32
107static int is_windows_drive_prefix(const char *filename)
108{
109 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
110 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
111 filename[1] == ':');
112}
113
114int is_windows_drive(const char *filename)
115{
116 if (is_windows_drive_prefix(filename) &&
117 filename[2] == '\0')
118 return 1;
119 if (strstart(filename, "\\\\.\\", NULL) ||
120 strstart(filename, "//./", NULL))
121 return 1;
122 return 0;
123}
124#endif
125
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800126/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200127void bdrv_set_io_limits(BlockDriverState *bs,
128 ThrottleConfig *cfg)
129{
130 int i;
131
132 throttle_config(&bs->throttle_state, cfg);
133
134 for (i = 0; i < 2; i++) {
135 qemu_co_enter_next(&bs->throttled_reqs[i]);
136 }
137}
138
139/* this function drain all the throttled IOs */
140static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
141{
142 bool drained = false;
143 bool enabled = bs->io_limits_enabled;
144 int i;
145
146 bs->io_limits_enabled = false;
147
148 for (i = 0; i < 2; i++) {
149 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
150 drained = true;
151 }
152 }
153
154 bs->io_limits_enabled = enabled;
155
156 return drained;
157}
158
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800159void bdrv_io_limits_disable(BlockDriverState *bs)
160{
161 bs->io_limits_enabled = false;
162
Benoît Canetcc0681c2013-09-02 14:14:39 +0200163 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800164
Benoît Canetcc0681c2013-09-02 14:14:39 +0200165 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800166}
167
Benoît Canetcc0681c2013-09-02 14:14:39 +0200168static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800169{
170 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200171 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800172}
173
Benoît Canetcc0681c2013-09-02 14:14:39 +0200174static void bdrv_throttle_write_timer_cb(void *opaque)
175{
176 BlockDriverState *bs = opaque;
177 qemu_co_enter_next(&bs->throttled_reqs[1]);
178}
179
180/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800181void bdrv_io_limits_enable(BlockDriverState *bs)
182{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200183 assert(!bs->io_limits_enabled);
184 throttle_init(&bs->throttle_state,
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +0200185 bdrv_get_aio_context(bs),
Benoît Canetcc0681c2013-09-02 14:14:39 +0200186 QEMU_CLOCK_VIRTUAL,
187 bdrv_throttle_read_timer_cb,
188 bdrv_throttle_write_timer_cb,
189 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800190 bs->io_limits_enabled = true;
191}
192
Benoît Canetcc0681c2013-09-02 14:14:39 +0200193/* This function makes an IO wait if needed
194 *
195 * @nb_sectors: the number of sectors of the IO
196 * @is_write: is the IO a write
197 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800198static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100199 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200200 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800201{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200202 /* does this io must wait */
203 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800204
Benoît Canetcc0681c2013-09-02 14:14:39 +0200205 /* if must wait or any request of this type throttled queue the IO */
206 if (must_wait ||
207 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
208 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800209 }
210
Benoît Canetcc0681c2013-09-02 14:14:39 +0200211 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100212 throttle_account(&bs->throttle_state, is_write, bytes);
213
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800214
Benoît Canetcc0681c2013-09-02 14:14:39 +0200215 /* if the next request must wait -> do nothing */
216 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
217 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800218 }
219
Benoît Canetcc0681c2013-09-02 14:14:39 +0200220 /* else queue next request for execution */
221 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800222}
223
Kevin Wolf339064d2013-11-28 10:23:32 +0100224size_t bdrv_opt_mem_align(BlockDriverState *bs)
225{
226 if (!bs || !bs->drv) {
227 /* 4k should be on the safe side */
228 return 4096;
229 }
230
231 return bs->bl.opt_mem_alignment;
232}
233
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000234/* check if the path starts with "<protocol>:" */
235static int path_has_protocol(const char *path)
236{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200237 const char *p;
238
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000239#ifdef _WIN32
240 if (is_windows_drive(path) ||
241 is_windows_drive_prefix(path)) {
242 return 0;
243 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200244 p = path + strcspn(path, ":/\\");
245#else
246 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000247#endif
248
Paolo Bonzini947995c2012-05-08 16:51:48 +0200249 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000250}
251
bellard83f64092006-08-01 16:21:11 +0000252int path_is_absolute(const char *path)
253{
bellard21664422007-01-07 18:22:37 +0000254#ifdef _WIN32
255 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200256 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000257 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200258 }
259 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000260#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200261 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000262#endif
bellard83f64092006-08-01 16:21:11 +0000263}
264
265/* if filename is absolute, just copy it to dest. Otherwise, build a
266 path to it by considering it is relative to base_path. URL are
267 supported. */
268void path_combine(char *dest, int dest_size,
269 const char *base_path,
270 const char *filename)
271{
272 const char *p, *p1;
273 int len;
274
275 if (dest_size <= 0)
276 return;
277 if (path_is_absolute(filename)) {
278 pstrcpy(dest, dest_size, filename);
279 } else {
280 p = strchr(base_path, ':');
281 if (p)
282 p++;
283 else
284 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000285 p1 = strrchr(base_path, '/');
286#ifdef _WIN32
287 {
288 const char *p2;
289 p2 = strrchr(base_path, '\\');
290 if (!p1 || p2 > p1)
291 p1 = p2;
292 }
293#endif
bellard83f64092006-08-01 16:21:11 +0000294 if (p1)
295 p1++;
296 else
297 p1 = base_path;
298 if (p1 > p)
299 p = p1;
300 len = p - base_path;
301 if (len > dest_size - 1)
302 len = dest_size - 1;
303 memcpy(dest, base_path, len);
304 dest[len] = '\0';
305 pstrcat(dest, dest_size, filename);
306 }
307}
308
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200309void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
310{
311 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
312 pstrcpy(dest, sz, bs->backing_file);
313 } else {
314 path_combine(dest, sz, bs->filename, bs->backing_file);
315 }
316}
317
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500318void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000319{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100320 /* Block drivers without coroutine functions need emulation */
321 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200322 bdrv->bdrv_co_readv = bdrv_co_readv_em;
323 bdrv->bdrv_co_writev = bdrv_co_writev_em;
324
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100325 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
326 * the block driver lacks aio we need to emulate that too.
327 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200328 if (!bdrv->bdrv_aio_readv) {
329 /* add AIO emulation layer */
330 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
331 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200332 }
bellard83f64092006-08-01 16:21:11 +0000333 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200334
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100335 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000336}
bellardb3380822004-03-14 21:38:54 +0000337
338/* create a new block device (by default it is empty) */
Markus Armbrustere4e99862014-10-07 13:59:03 +0200339BlockDriverState *bdrv_new_root(const char *device_name, Error **errp)
bellardfc01f7e2003-06-30 10:03:06 +0000340{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100341 BlockDriverState *bs;
Markus Armbrustere4e99862014-10-07 13:59:03 +0200342
343 assert(*device_name);
bellardb3380822004-03-14 21:38:54 +0000344
Markus Armbrusterf5bebbb2014-09-30 13:59:30 +0200345 if (*device_name && !id_wellformed(device_name)) {
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200346 error_setg(errp, "Invalid device name");
347 return NULL;
348 }
349
Kevin Wolff2d953e2014-04-17 13:27:05 +0200350 if (bdrv_find(device_name)) {
351 error_setg(errp, "Device with id '%s' already exists",
352 device_name);
353 return NULL;
354 }
355 if (bdrv_find_node(device_name)) {
Markus Armbrusterd2244692014-09-12 21:26:24 +0200356 error_setg(errp,
357 "Device name '%s' conflicts with an existing node name",
Kevin Wolff2d953e2014-04-17 13:27:05 +0200358 device_name);
359 return NULL;
360 }
361
Markus Armbrustere4e99862014-10-07 13:59:03 +0200362 bs = bdrv_new();
363
364 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
365 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
366
367 return bs;
368}
369
370BlockDriverState *bdrv_new(void)
371{
372 BlockDriverState *bs;
373 int i;
374
Markus Armbruster5839e532014-08-19 10:31:08 +0200375 bs = g_new0(BlockDriverState, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +0800376 QLIST_INIT(&bs->dirty_bitmaps);
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800377 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
378 QLIST_INIT(&bs->op_blockers[i]);
379 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300380 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200381 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200382 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200383 qemu_co_queue_init(&bs->throttled_reqs[0]);
384 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800385 bs->refcnt = 1;
Stefan Hajnoczidcd04222014-05-08 16:34:37 +0200386 bs->aio_context = qemu_get_aio_context();
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200387
bellardb3380822004-03-14 21:38:54 +0000388 return bs;
389}
390
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200391void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
392{
393 notifier_list_add(&bs->close_notifiers, notify);
394}
395
bellardea2384d2004-08-01 21:59:26 +0000396BlockDriver *bdrv_find_format(const char *format_name)
397{
398 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100399 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
400 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000401 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100402 }
bellardea2384d2004-08-01 21:59:26 +0000403 }
404 return NULL;
405}
406
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800407static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100408{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800409 static const char *whitelist_rw[] = {
410 CONFIG_BDRV_RW_WHITELIST
411 };
412 static const char *whitelist_ro[] = {
413 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100414 };
415 const char **p;
416
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800417 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100418 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800419 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100420
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800421 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100422 if (!strcmp(drv->format_name, *p)) {
423 return 1;
424 }
425 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800426 if (read_only) {
427 for (p = whitelist_ro; *p; p++) {
428 if (!strcmp(drv->format_name, *p)) {
429 return 1;
430 }
431 }
432 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100433 return 0;
434}
435
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800436BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
437 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100438{
439 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800440 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100441}
442
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800443typedef struct CreateCo {
444 BlockDriver *drv;
445 char *filename;
Chunyan Liu83d05212014-06-05 17:20:51 +0800446 QemuOpts *opts;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800447 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200448 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800449} CreateCo;
450
451static void coroutine_fn bdrv_create_co_entry(void *opaque)
452{
Max Reitzcc84d902013-09-06 17:14:26 +0200453 Error *local_err = NULL;
454 int ret;
455
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800456 CreateCo *cco = opaque;
457 assert(cco->drv);
458
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800459 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100460 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200461 error_propagate(&cco->err, local_err);
462 }
463 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800464}
465
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200466int bdrv_create(BlockDriver *drv, const char* filename,
Chunyan Liu83d05212014-06-05 17:20:51 +0800467 QemuOpts *opts, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000468{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800469 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200470
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800471 Coroutine *co;
472 CreateCo cco = {
473 .drv = drv,
474 .filename = g_strdup(filename),
Chunyan Liu83d05212014-06-05 17:20:51 +0800475 .opts = opts,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800476 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200477 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800478 };
479
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800480 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200481 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300482 ret = -ENOTSUP;
483 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800484 }
485
486 if (qemu_in_coroutine()) {
487 /* Fast-path if already in coroutine context */
488 bdrv_create_co_entry(&cco);
489 } else {
490 co = qemu_coroutine_create(bdrv_create_co_entry);
491 qemu_coroutine_enter(co, &cco);
492 while (cco.ret == NOT_DONE) {
Paolo Bonzinib47ec2c2014-07-07 15:18:01 +0200493 aio_poll(qemu_get_aio_context(), true);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800494 }
495 }
496
497 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200498 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100499 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200500 error_propagate(errp, cco.err);
501 } else {
502 error_setg_errno(errp, -ret, "Could not create image");
503 }
504 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800505
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300506out:
507 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800508 return ret;
bellardea2384d2004-08-01 21:59:26 +0000509}
510
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800511int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200512{
513 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200514 Error *local_err = NULL;
515 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200516
Kevin Wolf98289622013-07-10 15:47:39 +0200517 drv = bdrv_find_protocol(filename, true);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200518 if (drv == NULL) {
Max Reitzcc84d902013-09-06 17:14:26 +0200519 error_setg(errp, "Could not find protocol for file '%s'", filename);
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000520 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200521 }
522
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800523 ret = bdrv_create(drv, filename, opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100524 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200525 error_propagate(errp, local_err);
526 }
527 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200528}
529
Kevin Wolf3baca892014-07-16 17:48:16 +0200530void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100531{
532 BlockDriver *drv = bs->drv;
Kevin Wolf3baca892014-07-16 17:48:16 +0200533 Error *local_err = NULL;
Kevin Wolfd34682c2013-12-11 19:26:16 +0100534
535 memset(&bs->bl, 0, sizeof(bs->bl));
536
Kevin Wolf466ad822013-12-11 19:50:32 +0100537 if (!drv) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200538 return;
Kevin Wolf466ad822013-12-11 19:50:32 +0100539 }
540
541 /* Take some limits from the children as a default */
542 if (bs->file) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200543 bdrv_refresh_limits(bs->file, &local_err);
544 if (local_err) {
545 error_propagate(errp, local_err);
546 return;
547 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100548 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100549 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
550 } else {
551 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100552 }
553
554 if (bs->backing_hd) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200555 bdrv_refresh_limits(bs->backing_hd, &local_err);
556 if (local_err) {
557 error_propagate(errp, local_err);
558 return;
559 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100560 bs->bl.opt_transfer_length =
561 MAX(bs->bl.opt_transfer_length,
562 bs->backing_hd->bl.opt_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100563 bs->bl.opt_mem_alignment =
564 MAX(bs->bl.opt_mem_alignment,
565 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100566 }
567
568 /* Then let the driver override it */
569 if (drv->bdrv_refresh_limits) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200570 drv->bdrv_refresh_limits(bs, errp);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100571 }
Kevin Wolfd34682c2013-12-11 19:26:16 +0100572}
573
Jim Meyeringeba25052012-05-28 09:27:54 +0200574/*
575 * Create a uniquely-named empty temporary file.
576 * Return 0 upon success, otherwise a negative errno value.
577 */
578int get_tmp_filename(char *filename, int size)
579{
bellardd5249392004-08-03 21:14:23 +0000580#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000581 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200582 /* GetTempFileName requires that its output buffer (4th param)
583 have length MAX_PATH or greater. */
584 assert(size >= MAX_PATH);
585 return (GetTempPath(MAX_PATH, temp_dir)
586 && GetTempFileName(temp_dir, "qem", 0, filename)
587 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000588#else
bellardea2384d2004-08-01 21:59:26 +0000589 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000590 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000591 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530592 if (!tmpdir) {
593 tmpdir = "/var/tmp";
594 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200595 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
596 return -EOVERFLOW;
597 }
bellardea2384d2004-08-01 21:59:26 +0000598 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800599 if (fd < 0) {
600 return -errno;
601 }
602 if (close(fd) != 0) {
603 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200604 return -errno;
605 }
606 return 0;
bellardd5249392004-08-03 21:14:23 +0000607#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200608}
bellardea2384d2004-08-01 21:59:26 +0000609
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200610/*
611 * Detect host devices. By convention, /dev/cdrom[N] is always
612 * recognized as a host CDROM.
613 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200614static BlockDriver *find_hdev_driver(const char *filename)
615{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200616 int score_max = 0, score;
617 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200618
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100619 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200620 if (d->bdrv_probe_device) {
621 score = d->bdrv_probe_device(filename);
622 if (score > score_max) {
623 score_max = score;
624 drv = d;
625 }
626 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200627 }
628
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200629 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200630}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200631
Kevin Wolf98289622013-07-10 15:47:39 +0200632BlockDriver *bdrv_find_protocol(const char *filename,
633 bool allow_protocol_prefix)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200634{
635 BlockDriver *drv1;
636 char protocol[128];
637 int len;
638 const char *p;
639
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200640 /* TODO Drivers without bdrv_file_open must be specified explicitly */
641
Christoph Hellwig39508e72010-06-23 12:25:17 +0200642 /*
643 * XXX(hch): we really should not let host device detection
644 * override an explicit protocol specification, but moving this
645 * later breaks access to device names with colons in them.
646 * Thanks to the brain-dead persistent naming schemes on udev-
647 * based Linux systems those actually are quite common.
648 */
649 drv1 = find_hdev_driver(filename);
650 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200651 return drv1;
652 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200653
Kevin Wolf98289622013-07-10 15:47:39 +0200654 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200655 return bdrv_find_format("file");
656 }
Kevin Wolf98289622013-07-10 15:47:39 +0200657
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000658 p = strchr(filename, ':');
659 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200660 len = p - filename;
661 if (len > sizeof(protocol) - 1)
662 len = sizeof(protocol) - 1;
663 memcpy(protocol, filename, len);
664 protocol[len] = '\0';
665 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
666 if (drv1->protocol_name &&
667 !strcmp(drv1->protocol_name, protocol)) {
668 return drv1;
669 }
670 }
671 return NULL;
672}
673
Kevin Wolff500a6d2012-11-12 17:35:27 +0100674static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200675 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000676{
Kevin Wolff500a6d2012-11-12 17:35:27 +0100677 int score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000678 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000679 uint8_t buf[2048];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100680 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700681
Kevin Wolf08a00552010-06-01 18:37:31 +0200682 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100683 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200684 drv = bdrv_find_format("raw");
685 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200686 error_setg(errp, "Could not find raw image format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200687 ret = -ENOENT;
688 }
689 *pdrv = drv;
690 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700691 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700692
bellard83f64092006-08-01 16:21:11 +0000693 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000694 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200695 error_setg_errno(errp, -ret, "Could not read image for determining its "
696 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200697 *pdrv = NULL;
698 return ret;
bellard83f64092006-08-01 16:21:11 +0000699 }
700
bellardea2384d2004-08-01 21:59:26 +0000701 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200702 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100703 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000704 if (drv1->bdrv_probe) {
705 score = drv1->bdrv_probe(buf, ret, filename);
706 if (score > score_max) {
707 score_max = score;
708 drv = drv1;
709 }
bellardea2384d2004-08-01 21:59:26 +0000710 }
711 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200712 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200713 error_setg(errp, "Could not determine image format: No compatible "
714 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200715 ret = -ENOENT;
716 }
717 *pdrv = drv;
718 return ret;
bellardea2384d2004-08-01 21:59:26 +0000719}
720
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100721/**
722 * Set the current 'total_sectors' value
Markus Armbruster65a9bb22014-06-26 13:23:17 +0200723 * Return 0 on success, -errno on error.
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100724 */
725static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
726{
727 BlockDriver *drv = bs->drv;
728
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700729 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
730 if (bs->sg)
731 return 0;
732
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100733 /* query actual device if possible, otherwise just trust the hint */
734 if (drv->bdrv_getlength) {
735 int64_t length = drv->bdrv_getlength(bs);
736 if (length < 0) {
737 return length;
738 }
Fam Zheng7e382002013-11-06 19:48:06 +0800739 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100740 }
741
742 bs->total_sectors = hint;
743 return 0;
744}
745
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100746/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100747 * Set open flags for a given discard mode
748 *
749 * Return 0 on success, -1 if the discard mode was invalid.
750 */
751int bdrv_parse_discard_flags(const char *mode, int *flags)
752{
753 *flags &= ~BDRV_O_UNMAP;
754
755 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
756 /* do nothing */
757 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
758 *flags |= BDRV_O_UNMAP;
759 } else {
760 return -1;
761 }
762
763 return 0;
764}
765
766/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100767 * Set open flags for a given cache mode
768 *
769 * Return 0 on success, -1 if the cache mode was invalid.
770 */
771int bdrv_parse_cache_flags(const char *mode, int *flags)
772{
773 *flags &= ~BDRV_O_CACHE_MASK;
774
775 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
776 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100777 } else if (!strcmp(mode, "directsync")) {
778 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100779 } else if (!strcmp(mode, "writeback")) {
780 *flags |= BDRV_O_CACHE_WB;
781 } else if (!strcmp(mode, "unsafe")) {
782 *flags |= BDRV_O_CACHE_WB;
783 *flags |= BDRV_O_NO_FLUSH;
784 } else if (!strcmp(mode, "writethrough")) {
785 /* this is the default */
786 } else {
787 return -1;
788 }
789
790 return 0;
791}
792
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000793/**
794 * The copy-on-read flag is actually a reference count so multiple users may
795 * use the feature without worrying about clobbering its previous state.
796 * Copy-on-read stays enabled until all users have called to disable it.
797 */
798void bdrv_enable_copy_on_read(BlockDriverState *bs)
799{
800 bs->copy_on_read++;
801}
802
803void bdrv_disable_copy_on_read(BlockDriverState *bs)
804{
805 assert(bs->copy_on_read > 0);
806 bs->copy_on_read--;
807}
808
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200809/*
Kevin Wolfb1e6fc02014-05-06 12:11:42 +0200810 * Returns the flags that a temporary snapshot should get, based on the
811 * originally requested flags (the originally requested image will have flags
812 * like a backing file)
813 */
814static int bdrv_temp_snapshot_flags(int flags)
815{
816 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
817}
818
819/*
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200820 * Returns the flags that bs->file should get, based on the given flags for
821 * the parent BDS
822 */
823static int bdrv_inherited_flags(int flags)
824{
825 /* Enable protocol handling, disable format probing for bs->file */
826 flags |= BDRV_O_PROTOCOL;
827
828 /* Our block drivers take care to send flushes and respect unmap policy,
829 * so we can enable both unconditionally on lower layers. */
830 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
831
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200832 /* Clear flags that only apply to the top layer */
Kevin Wolf5669b442014-04-11 21:36:45 +0200833 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200834
835 return flags;
836}
837
Kevin Wolf317fc442014-04-25 13:27:34 +0200838/*
839 * Returns the flags that bs->backing_hd should get, based on the given flags
840 * for the parent BDS
841 */
842static int bdrv_backing_flags(int flags)
843{
844 /* backing files always opened read-only */
845 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
846
847 /* snapshot=on is handled on the top layer */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200848 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
Kevin Wolf317fc442014-04-25 13:27:34 +0200849
850 return flags;
851}
852
Kevin Wolf7b272452012-11-12 17:05:39 +0100853static int bdrv_open_flags(BlockDriverState *bs, int flags)
854{
855 int open_flags = flags | BDRV_O_CACHE_WB;
856
857 /*
858 * Clear flags that are internal to the block layer before opening the
859 * image.
860 */
Kevin Wolf20cca272014-06-04 14:33:27 +0200861 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
Kevin Wolf7b272452012-11-12 17:05:39 +0100862
863 /*
864 * Snapshots should be writable.
865 */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200866 if (flags & BDRV_O_TEMPORARY) {
Kevin Wolf7b272452012-11-12 17:05:39 +0100867 open_flags |= BDRV_O_RDWR;
868 }
869
870 return open_flags;
871}
872
Kevin Wolf636ea372014-01-24 14:11:52 +0100873static void bdrv_assign_node_name(BlockDriverState *bs,
874 const char *node_name,
875 Error **errp)
Benoît Canet6913c0c2014-01-23 21:31:33 +0100876{
877 if (!node_name) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100878 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100879 }
880
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200881 /* Check for empty string or invalid characters */
Markus Armbrusterf5bebbb2014-09-30 13:59:30 +0200882 if (!id_wellformed(node_name)) {
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200883 error_setg(errp, "Invalid node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100884 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100885 }
886
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100887 /* takes care of avoiding namespaces collisions */
888 if (bdrv_find(node_name)) {
889 error_setg(errp, "node-name=%s is conflicting with a device id",
890 node_name);
Kevin Wolf636ea372014-01-24 14:11:52 +0100891 return;
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100892 }
893
Benoît Canet6913c0c2014-01-23 21:31:33 +0100894 /* takes care of avoiding duplicates node names */
895 if (bdrv_find_node(node_name)) {
896 error_setg(errp, "Duplicate node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100897 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100898 }
899
900 /* copy node name into the bs and insert it into the graph list */
901 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
902 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
Benoît Canet6913c0c2014-01-23 21:31:33 +0100903}
904
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200905/*
Kevin Wolf57915332010-04-14 15:24:50 +0200906 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100907 *
908 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200909 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100910static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200911 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200912{
913 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200914 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100915 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200916 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200917
918 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200919 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100920 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200921
Kevin Wolf45673672013-04-22 17:48:40 +0200922 if (file != NULL) {
923 filename = file->filename;
924 } else {
925 filename = qdict_get_try_str(options, "filename");
926 }
927
Kevin Wolf765003d2014-02-03 14:49:42 +0100928 if (drv->bdrv_needs_filename && !filename) {
929 error_setg(errp, "The '%s' block driver requires a file name",
930 drv->format_name);
931 return -EINVAL;
932 }
933
Kevin Wolf45673672013-04-22 17:48:40 +0200934 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100935
Benoît Canet6913c0c2014-01-23 21:31:33 +0100936 node_name = qdict_get_try_str(options, "node-name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100937 bdrv_assign_node_name(bs, node_name, &local_err);
Markus Armbruster0fb63952014-04-25 16:50:31 +0200938 if (local_err) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100939 error_propagate(errp, local_err);
940 return -EINVAL;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100941 }
942 qdict_del(options, "node-name");
943
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100944 /* bdrv_open() with directly using a protocol as drv. This layer is already
945 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
946 * and return immediately. */
947 if (file != NULL && drv->bdrv_file_open) {
948 bdrv_swap(file, bs);
949 return 0;
950 }
951
Kevin Wolf57915332010-04-14 15:24:50 +0200952 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +0100953 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +0100954 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +0800955 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800956 open_flags = bdrv_open_flags(bs, flags);
957 bs->read_only = !(open_flags & BDRV_O_RDWR);
Kevin Wolf20cca272014-06-04 14:33:27 +0200958 bs->growable = !!(flags & BDRV_O_PROTOCOL);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800959
960 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +0200961 error_setg(errp,
962 !bs->read_only && bdrv_is_whitelisted(drv, true)
963 ? "Driver '%s' can only be used for read-only devices"
964 : "Driver '%s' is not whitelisted",
965 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800966 return -ENOTSUP;
967 }
Kevin Wolf57915332010-04-14 15:24:50 +0200968
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000969 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +0200970 if (flags & BDRV_O_COPY_ON_READ) {
971 if (!bs->read_only) {
972 bdrv_enable_copy_on_read(bs);
973 } else {
974 error_setg(errp, "Can't use copy-on-read on read-only device");
975 return -EINVAL;
976 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000977 }
978
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100979 if (filename != NULL) {
980 pstrcpy(bs->filename, sizeof(bs->filename), filename);
981 } else {
982 bs->filename[0] = '\0';
983 }
Max Reitz91af7012014-07-18 20:24:56 +0200984 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +0200985
Kevin Wolf57915332010-04-14 15:24:50 +0200986 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500987 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200988
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100989 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100990
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200991 /* Open the image, either directly or using a protocol */
992 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100993 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +0200994 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200995 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +0100996 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200997 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200998 error_setg(errp, "Can't use '%s' as a block driver for the "
999 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +02001000 ret = -EINVAL;
1001 goto free_and_fail;
1002 }
Kevin Wolff500a6d2012-11-12 17:35:27 +01001003 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001004 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001005 }
1006
Kevin Wolf57915332010-04-14 15:24:50 +02001007 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +01001008 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001009 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +08001010 } else if (bs->filename[0]) {
1011 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001012 } else {
1013 error_setg_errno(errp, -ret, "Could not open image");
1014 }
Kevin Wolf57915332010-04-14 15:24:50 +02001015 goto free_and_fail;
1016 }
1017
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001018 ret = refresh_total_sectors(bs, bs->total_sectors);
1019 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001020 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001021 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +02001022 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001023
Kevin Wolf3baca892014-07-16 17:48:16 +02001024 bdrv_refresh_limits(bs, &local_err);
1025 if (local_err) {
1026 error_propagate(errp, local_err);
1027 ret = -EINVAL;
1028 goto free_and_fail;
1029 }
1030
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001031 assert(bdrv_opt_mem_align(bs) != 0);
Kevin Wolf47ea2de2014-03-05 15:49:55 +01001032 assert((bs->request_alignment != 0) || bs->sg);
Kevin Wolf57915332010-04-14 15:24:50 +02001033 return 0;
1034
1035free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001036 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -05001037 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +02001038 bs->opaque = NULL;
1039 bs->drv = NULL;
1040 return ret;
1041}
1042
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001043static QDict *parse_json_filename(const char *filename, Error **errp)
1044{
1045 QObject *options_obj;
1046 QDict *options;
1047 int ret;
1048
1049 ret = strstart(filename, "json:", &filename);
1050 assert(ret);
1051
1052 options_obj = qobject_from_json(filename);
1053 if (!options_obj) {
1054 error_setg(errp, "Could not parse the JSON options");
1055 return NULL;
1056 }
1057
1058 if (qobject_type(options_obj) != QTYPE_QDICT) {
1059 qobject_decref(options_obj);
1060 error_setg(errp, "Invalid JSON object given");
1061 return NULL;
1062 }
1063
1064 options = qobject_to_qdict(options_obj);
1065 qdict_flatten(options);
1066
1067 return options;
1068}
1069
Kevin Wolf57915332010-04-14 15:24:50 +02001070/*
Kevin Wolff54120f2014-05-26 11:09:59 +02001071 * Fills in default options for opening images and converts the legacy
1072 * filename/flags pair to option QDict entries.
1073 */
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001074static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
Kevin Wolf17b005f2014-05-27 10:50:29 +02001075 BlockDriver *drv, Error **errp)
Kevin Wolff54120f2014-05-26 11:09:59 +02001076{
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001077 const char *filename = *pfilename;
Kevin Wolff54120f2014-05-26 11:09:59 +02001078 const char *drvname;
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001079 bool protocol = flags & BDRV_O_PROTOCOL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001080 bool parse_filename = false;
1081 Error *local_err = NULL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001082
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001083 /* Parse json: pseudo-protocol */
1084 if (filename && g_str_has_prefix(filename, "json:")) {
1085 QDict *json_options = parse_json_filename(filename, &local_err);
1086 if (local_err) {
1087 error_propagate(errp, local_err);
1088 return -EINVAL;
1089 }
1090
1091 /* Options given in the filename have lower priority than options
1092 * specified directly */
1093 qdict_join(*options, json_options, false);
1094 QDECREF(json_options);
1095 *pfilename = filename = NULL;
1096 }
1097
Kevin Wolff54120f2014-05-26 11:09:59 +02001098 /* Fetch the file name from the options QDict if necessary */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001099 if (protocol && filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001100 if (!qdict_haskey(*options, "filename")) {
1101 qdict_put(*options, "filename", qstring_from_str(filename));
1102 parse_filename = true;
1103 } else {
1104 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1105 "the same time");
1106 return -EINVAL;
1107 }
1108 }
1109
1110 /* Find the right block driver */
1111 filename = qdict_get_try_str(*options, "filename");
1112 drvname = qdict_get_try_str(*options, "driver");
1113
Kevin Wolf17b005f2014-05-27 10:50:29 +02001114 if (drv) {
1115 if (drvname) {
1116 error_setg(errp, "Driver specified twice");
1117 return -EINVAL;
1118 }
1119 drvname = drv->format_name;
1120 qdict_put(*options, "driver", qstring_from_str(drvname));
1121 } else {
1122 if (!drvname && protocol) {
1123 if (filename) {
1124 drv = bdrv_find_protocol(filename, parse_filename);
1125 if (!drv) {
1126 error_setg(errp, "Unknown protocol");
1127 return -EINVAL;
1128 }
1129
1130 drvname = drv->format_name;
1131 qdict_put(*options, "driver", qstring_from_str(drvname));
1132 } else {
1133 error_setg(errp, "Must specify either driver or file");
Kevin Wolff54120f2014-05-26 11:09:59 +02001134 return -EINVAL;
1135 }
Kevin Wolf17b005f2014-05-27 10:50:29 +02001136 } else if (drvname) {
1137 drv = bdrv_find_format(drvname);
1138 if (!drv) {
1139 error_setg(errp, "Unknown driver '%s'", drvname);
1140 return -ENOENT;
1141 }
Kevin Wolff54120f2014-05-26 11:09:59 +02001142 }
1143 }
1144
Kevin Wolf17b005f2014-05-27 10:50:29 +02001145 assert(drv || !protocol);
Kevin Wolff54120f2014-05-26 11:09:59 +02001146
1147 /* Driver-specific filename parsing */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001148 if (drv && drv->bdrv_parse_filename && parse_filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001149 drv->bdrv_parse_filename(filename, *options, &local_err);
1150 if (local_err) {
1151 error_propagate(errp, local_err);
1152 return -EINVAL;
1153 }
1154
1155 if (!drv->bdrv_needs_filename) {
1156 qdict_del(*options, "filename");
1157 }
1158 }
1159
1160 return 0;
1161}
1162
Fam Zheng8d24cce2014-05-23 21:29:45 +08001163void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1164{
1165
Fam Zheng826b6ca2014-05-23 21:29:47 +08001166 if (bs->backing_hd) {
1167 assert(bs->backing_blocker);
1168 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1169 } else if (backing_hd) {
1170 error_setg(&bs->backing_blocker,
1171 "device is used as backing hd of '%s'",
1172 bs->device_name);
1173 }
1174
Fam Zheng8d24cce2014-05-23 21:29:45 +08001175 bs->backing_hd = backing_hd;
1176 if (!backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001177 error_free(bs->backing_blocker);
1178 bs->backing_blocker = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001179 goto out;
1180 }
1181 bs->open_flags &= ~BDRV_O_NO_BACKING;
1182 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1183 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1184 backing_hd->drv ? backing_hd->drv->format_name : "");
Fam Zheng826b6ca2014-05-23 21:29:47 +08001185
1186 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1187 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1188 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
1189 bs->backing_blocker);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001190out:
Kevin Wolf3baca892014-07-16 17:48:16 +02001191 bdrv_refresh_limits(bs, NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001192}
1193
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001194/*
1195 * Opens the backing file for a BlockDriverState if not yet open
1196 *
1197 * options is a QDict of options to pass to the block drivers, or NULL for an
1198 * empty set of options. The reference to the QDict is transferred to this
1199 * function (even on failure), so if the caller intends to reuse the dictionary,
1200 * it needs to use QINCREF() before calling bdrv_file_open.
1201 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001202int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001203{
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001204 char *backing_filename = g_malloc0(PATH_MAX);
Kevin Wolf317fc442014-04-25 13:27:34 +02001205 int ret = 0;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001206 BlockDriver *back_drv = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001207 BlockDriverState *backing_hd;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001208 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001209
1210 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001211 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001212 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001213 }
1214
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001215 /* NULL means an empty set of options */
1216 if (options == NULL) {
1217 options = qdict_new();
1218 }
1219
Paolo Bonzini9156df12012-10-18 16:49:17 +02001220 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001221 if (qdict_haskey(options, "file.filename")) {
1222 backing_filename[0] = '\0';
1223 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001224 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001225 goto free_exit;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001226 } else {
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001227 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001228 }
1229
Kevin Wolf8ee79e72014-06-04 15:09:35 +02001230 if (!bs->drv || !bs->drv->supports_backing) {
1231 ret = -EINVAL;
1232 error_setg(errp, "Driver doesn't support backing files");
1233 QDECREF(options);
1234 goto free_exit;
1235 }
1236
Markus Armbrustere4e99862014-10-07 13:59:03 +02001237 backing_hd = bdrv_new();
Fam Zheng8d24cce2014-05-23 21:29:45 +08001238
Paolo Bonzini9156df12012-10-18 16:49:17 +02001239 if (bs->backing_format[0] != '\0') {
1240 back_drv = bdrv_find_format(bs->backing_format);
1241 }
1242
Max Reitzf67503e2014-02-18 18:33:05 +01001243 assert(bs->backing_hd == NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001244 ret = bdrv_open(&backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001245 *backing_filename ? backing_filename : NULL, NULL, options,
Kevin Wolf317fc442014-04-25 13:27:34 +02001246 bdrv_backing_flags(bs->open_flags), back_drv, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001247 if (ret < 0) {
Fam Zheng8d24cce2014-05-23 21:29:45 +08001248 bdrv_unref(backing_hd);
1249 backing_hd = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001250 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001251 error_setg(errp, "Could not open backing file: %s",
1252 error_get_pretty(local_err));
1253 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001254 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001255 }
Fam Zheng8d24cce2014-05-23 21:29:45 +08001256 bdrv_set_backing_hd(bs, backing_hd);
Peter Feinerd80ac652014-01-08 19:43:25 +00001257
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001258free_exit:
1259 g_free(backing_filename);
1260 return ret;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001261}
1262
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001263/*
Max Reitzda557aa2013-12-20 19:28:11 +01001264 * Opens a disk image whose options are given as BlockdevRef in another block
1265 * device's options.
1266 *
Max Reitzda557aa2013-12-20 19:28:11 +01001267 * If allow_none is true, no image will be opened if filename is false and no
1268 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1269 *
1270 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1271 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1272 * itself, all options starting with "${bdref_key}." are considered part of the
1273 * BlockdevRef.
1274 *
1275 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001276 *
1277 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001278 */
1279int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1280 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001281 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001282{
1283 QDict *image_options;
1284 int ret;
1285 char *bdref_key_dot;
1286 const char *reference;
1287
Max Reitzf67503e2014-02-18 18:33:05 +01001288 assert(pbs);
1289 assert(*pbs == NULL);
1290
Max Reitzda557aa2013-12-20 19:28:11 +01001291 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1292 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1293 g_free(bdref_key_dot);
1294
1295 reference = qdict_get_try_str(options, bdref_key);
1296 if (!filename && !reference && !qdict_size(image_options)) {
1297 if (allow_none) {
1298 ret = 0;
1299 } else {
1300 error_setg(errp, "A block device must be specified for \"%s\"",
1301 bdref_key);
1302 ret = -EINVAL;
1303 }
Markus Armbrusterb20e61e2014-05-28 11:16:57 +02001304 QDECREF(image_options);
Max Reitzda557aa2013-12-20 19:28:11 +01001305 goto done;
1306 }
1307
Max Reitzf7d9fd82014-02-18 18:33:12 +01001308 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001309
1310done:
1311 qdict_del(options, bdref_key);
1312 return ret;
1313}
1314
Chen Gang6b8aeca2014-06-23 23:28:23 +08001315int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
Kevin Wolfb9988752014-04-03 12:09:34 +02001316{
1317 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001318 char *tmp_filename = g_malloc0(PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001319 int64_t total_size;
1320 BlockDriver *bdrv_qcow2;
Chunyan Liu83d05212014-06-05 17:20:51 +08001321 QemuOpts *opts = NULL;
Kevin Wolfb9988752014-04-03 12:09:34 +02001322 QDict *snapshot_options;
1323 BlockDriverState *bs_snapshot;
1324 Error *local_err;
1325 int ret;
1326
1327 /* if snapshot, we create a temporary backing file and open it
1328 instead of opening 'filename' directly */
1329
1330 /* Get the required size from the image */
Kevin Wolff1877432014-04-04 17:07:19 +02001331 total_size = bdrv_getlength(bs);
1332 if (total_size < 0) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001333 ret = total_size;
Kevin Wolff1877432014-04-04 17:07:19 +02001334 error_setg_errno(errp, -total_size, "Could not get image size");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001335 goto out;
Kevin Wolff1877432014-04-04 17:07:19 +02001336 }
Kevin Wolfb9988752014-04-03 12:09:34 +02001337
1338 /* Create the temporary image */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001339 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001340 if (ret < 0) {
1341 error_setg_errno(errp, -ret, "Could not get temporary filename");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001342 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001343 }
1344
1345 bdrv_qcow2 = bdrv_find_format("qcow2");
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001346 opts = qemu_opts_create(bdrv_qcow2->create_opts, NULL, 0,
1347 &error_abort);
Chunyan Liu83d05212014-06-05 17:20:51 +08001348 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size);
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001349 ret = bdrv_create(bdrv_qcow2, tmp_filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08001350 qemu_opts_del(opts);
Kevin Wolfb9988752014-04-03 12:09:34 +02001351 if (ret < 0) {
1352 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1353 "'%s': %s", tmp_filename,
1354 error_get_pretty(local_err));
1355 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001356 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001357 }
1358
1359 /* Prepare a new options QDict for the temporary file */
1360 snapshot_options = qdict_new();
1361 qdict_put(snapshot_options, "file.driver",
1362 qstring_from_str("file"));
1363 qdict_put(snapshot_options, "file.filename",
1364 qstring_from_str(tmp_filename));
1365
Markus Armbrustere4e99862014-10-07 13:59:03 +02001366 bs_snapshot = bdrv_new();
Kevin Wolfb9988752014-04-03 12:09:34 +02001367
1368 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001369 flags, bdrv_qcow2, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001370 if (ret < 0) {
1371 error_propagate(errp, local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001372 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001373 }
1374
1375 bdrv_append(bs_snapshot, bs);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001376
1377out:
1378 g_free(tmp_filename);
Chen Gang6b8aeca2014-06-23 23:28:23 +08001379 return ret;
Kevin Wolfb9988752014-04-03 12:09:34 +02001380}
1381
Max Reitzda557aa2013-12-20 19:28:11 +01001382/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001383 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001384 *
1385 * options is a QDict of options to pass to the block drivers, or NULL for an
1386 * empty set of options. The reference to the QDict belongs to the block layer
1387 * after the call (even on failure), so if the caller intends to reuse the
1388 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001389 *
1390 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1391 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001392 *
1393 * The reference parameter may be used to specify an existing block device which
1394 * should be opened. If specified, neither options nor a filename may be given,
1395 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001396 */
Max Reitzddf56362014-02-18 18:33:06 +01001397int bdrv_open(BlockDriverState **pbs, const char *filename,
1398 const char *reference, QDict *options, int flags,
1399 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001400{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001401 int ret;
Max Reitzf67503e2014-02-18 18:33:05 +01001402 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001403 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001404 Error *local_err = NULL;
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001405 int snapshot_flags = 0;
bellard712e7872005-04-28 21:09:32 +00001406
Max Reitzf67503e2014-02-18 18:33:05 +01001407 assert(pbs);
1408
Max Reitzddf56362014-02-18 18:33:06 +01001409 if (reference) {
1410 bool options_non_empty = options ? qdict_size(options) : false;
1411 QDECREF(options);
1412
1413 if (*pbs) {
1414 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1415 "another block device");
1416 return -EINVAL;
1417 }
1418
1419 if (filename || options_non_empty) {
1420 error_setg(errp, "Cannot reference an existing block device with "
1421 "additional options or a new filename");
1422 return -EINVAL;
1423 }
1424
1425 bs = bdrv_lookup_bs(reference, reference, errp);
1426 if (!bs) {
1427 return -ENODEV;
1428 }
1429 bdrv_ref(bs);
1430 *pbs = bs;
1431 return 0;
1432 }
1433
Max Reitzf67503e2014-02-18 18:33:05 +01001434 if (*pbs) {
1435 bs = *pbs;
1436 } else {
Markus Armbrustere4e99862014-10-07 13:59:03 +02001437 bs = bdrv_new();
Max Reitzf67503e2014-02-18 18:33:05 +01001438 }
1439
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001440 /* NULL means an empty set of options */
1441 if (options == NULL) {
1442 options = qdict_new();
1443 }
1444
Kevin Wolf17b005f2014-05-27 10:50:29 +02001445 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001446 if (local_err) {
1447 goto fail;
1448 }
1449
Kevin Wolf76c591b2014-06-04 14:19:44 +02001450 /* Find the right image format driver */
1451 drv = NULL;
1452 drvname = qdict_get_try_str(options, "driver");
1453 if (drvname) {
1454 drv = bdrv_find_format(drvname);
1455 qdict_del(options, "driver");
1456 if (!drv) {
1457 error_setg(errp, "Unknown driver: '%s'", drvname);
1458 ret = -EINVAL;
1459 goto fail;
1460 }
1461 }
1462
1463 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1464 if (drv && !drv->bdrv_file_open) {
1465 /* If the user explicitly wants a format driver here, we'll need to add
1466 * another layer for the protocol in bs->file */
1467 flags &= ~BDRV_O_PROTOCOL;
1468 }
1469
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001470 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001471 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001472
Kevin Wolff4788ad2014-06-03 16:44:19 +02001473 /* Open image file without format layer */
1474 if ((flags & BDRV_O_PROTOCOL) == 0) {
1475 if (flags & BDRV_O_RDWR) {
1476 flags |= BDRV_O_ALLOW_RDWR;
1477 }
1478 if (flags & BDRV_O_SNAPSHOT) {
1479 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1480 flags = bdrv_backing_flags(flags);
1481 }
1482
1483 assert(file == NULL);
1484 ret = bdrv_open_image(&file, filename, options, "file",
1485 bdrv_inherited_flags(flags),
1486 true, &local_err);
1487 if (ret < 0) {
Max Reitz5469a2a2014-02-18 18:33:10 +01001488 goto fail;
1489 }
1490 }
1491
Kevin Wolf76c591b2014-06-04 14:19:44 +02001492 /* Image format probing */
1493 if (!drv && file) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001494 ret = find_image_format(file, filename, &drv, &local_err);
1495 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001496 goto fail;
Max Reitz2a05cbe2013-12-20 19:28:10 +01001497 }
Kevin Wolf76c591b2014-06-04 14:19:44 +02001498 } else if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001499 error_setg(errp, "Must specify either driver or file");
1500 ret = -EINVAL;
Kevin Wolf8bfea152014-04-11 19:16:36 +02001501 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001502 }
1503
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001504 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001505 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001506 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001507 goto fail;
Christoph Hellwig69873072010-01-20 18:13:25 +01001508 }
1509
Max Reitz2a05cbe2013-12-20 19:28:10 +01001510 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001511 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001512 file = NULL;
1513 }
1514
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001515 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001516 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001517 QDict *backing_options;
1518
Benoît Canet5726d872013-09-25 13:30:01 +02001519 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001520 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001521 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001522 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001523 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001524 }
1525
Max Reitz91af7012014-07-18 20:24:56 +02001526 bdrv_refresh_filename(bs);
1527
Kevin Wolfb9988752014-04-03 12:09:34 +02001528 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1529 * temporary snapshot afterwards. */
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001530 if (snapshot_flags) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001531 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001532 if (local_err) {
Kevin Wolfb9988752014-04-03 12:09:34 +02001533 goto close_and_fail;
1534 }
1535 }
1536
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001537 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001538 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001539 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001540 if (flags & BDRV_O_PROTOCOL) {
1541 error_setg(errp, "Block protocol '%s' doesn't support the option "
1542 "'%s'", drv->format_name, entry->key);
1543 } else {
1544 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1545 "support the option '%s'", drv->format_name,
1546 bs->device_name, entry->key);
1547 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001548
1549 ret = -EINVAL;
1550 goto close_and_fail;
1551 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001552
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001553 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001554 bdrv_dev_change_media_cb(bs, true);
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001555 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1556 && !runstate_check(RUN_STATE_INMIGRATE)
1557 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1558 error_setg(errp,
1559 "Guest must be stopped for opening of encrypted image");
1560 ret = -EBUSY;
1561 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001562 }
1563
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001564 QDECREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001565 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001566 return 0;
1567
Kevin Wolf8bfea152014-04-11 19:16:36 +02001568fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001569 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001570 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001571 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001572 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001573 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001574 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001575 if (!*pbs) {
1576 /* If *pbs is NULL, a new BDS has been created in this function and
1577 needs to be freed now. Otherwise, it does not need to be closed,
1578 since it has not really been opened yet. */
1579 bdrv_unref(bs);
1580 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001581 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001582 error_propagate(errp, local_err);
1583 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001584 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001585
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001586close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001587 /* See fail path, but now the BDS has to be always closed */
1588 if (*pbs) {
1589 bdrv_close(bs);
1590 } else {
1591 bdrv_unref(bs);
1592 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001593 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001594 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001595 error_propagate(errp, local_err);
1596 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001597 return ret;
1598}
1599
Jeff Codye971aa12012-09-20 15:13:19 -04001600typedef struct BlockReopenQueueEntry {
1601 bool prepared;
1602 BDRVReopenState state;
1603 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1604} BlockReopenQueueEntry;
1605
1606/*
1607 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1608 * reopen of multiple devices.
1609 *
1610 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1611 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1612 * be created and initialized. This newly created BlockReopenQueue should be
1613 * passed back in for subsequent calls that are intended to be of the same
1614 * atomic 'set'.
1615 *
1616 * bs is the BlockDriverState to add to the reopen queue.
1617 *
1618 * flags contains the open flags for the associated bs
1619 *
1620 * returns a pointer to bs_queue, which is either the newly allocated
1621 * bs_queue, or the existing bs_queue being used.
1622 *
1623 */
1624BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1625 BlockDriverState *bs, int flags)
1626{
1627 assert(bs != NULL);
1628
1629 BlockReopenQueueEntry *bs_entry;
1630 if (bs_queue == NULL) {
1631 bs_queue = g_new0(BlockReopenQueue, 1);
1632 QSIMPLEQ_INIT(bs_queue);
1633 }
1634
Kevin Wolff1f25a22014-04-25 19:04:55 +02001635 /* bdrv_open() masks this flag out */
1636 flags &= ~BDRV_O_PROTOCOL;
1637
Jeff Codye971aa12012-09-20 15:13:19 -04001638 if (bs->file) {
Kevin Wolff1f25a22014-04-25 19:04:55 +02001639 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
Jeff Codye971aa12012-09-20 15:13:19 -04001640 }
1641
1642 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1643 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1644
1645 bs_entry->state.bs = bs;
1646 bs_entry->state.flags = flags;
1647
1648 return bs_queue;
1649}
1650
1651/*
1652 * Reopen multiple BlockDriverStates atomically & transactionally.
1653 *
1654 * The queue passed in (bs_queue) must have been built up previous
1655 * via bdrv_reopen_queue().
1656 *
1657 * Reopens all BDS specified in the queue, with the appropriate
1658 * flags. All devices are prepared for reopen, and failure of any
1659 * device will cause all device changes to be abandonded, and intermediate
1660 * data cleaned up.
1661 *
1662 * If all devices prepare successfully, then the changes are committed
1663 * to all devices.
1664 *
1665 */
1666int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1667{
1668 int ret = -1;
1669 BlockReopenQueueEntry *bs_entry, *next;
1670 Error *local_err = NULL;
1671
1672 assert(bs_queue != NULL);
1673
1674 bdrv_drain_all();
1675
1676 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1677 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1678 error_propagate(errp, local_err);
1679 goto cleanup;
1680 }
1681 bs_entry->prepared = true;
1682 }
1683
1684 /* If we reach this point, we have success and just need to apply the
1685 * changes
1686 */
1687 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1688 bdrv_reopen_commit(&bs_entry->state);
1689 }
1690
1691 ret = 0;
1692
1693cleanup:
1694 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1695 if (ret && bs_entry->prepared) {
1696 bdrv_reopen_abort(&bs_entry->state);
1697 }
1698 g_free(bs_entry);
1699 }
1700 g_free(bs_queue);
1701 return ret;
1702}
1703
1704
1705/* Reopen a single BlockDriverState with the specified flags. */
1706int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1707{
1708 int ret = -1;
1709 Error *local_err = NULL;
1710 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1711
1712 ret = bdrv_reopen_multiple(queue, &local_err);
1713 if (local_err != NULL) {
1714 error_propagate(errp, local_err);
1715 }
1716 return ret;
1717}
1718
1719
1720/*
1721 * Prepares a BlockDriverState for reopen. All changes are staged in the
1722 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1723 * the block driver layer .bdrv_reopen_prepare()
1724 *
1725 * bs is the BlockDriverState to reopen
1726 * flags are the new open flags
1727 * queue is the reopen queue
1728 *
1729 * Returns 0 on success, non-zero on error. On error errp will be set
1730 * as well.
1731 *
1732 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1733 * It is the responsibility of the caller to then call the abort() or
1734 * commit() for any other BDS that have been left in a prepare() state
1735 *
1736 */
1737int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1738 Error **errp)
1739{
1740 int ret = -1;
1741 Error *local_err = NULL;
1742 BlockDriver *drv;
1743
1744 assert(reopen_state != NULL);
1745 assert(reopen_state->bs->drv != NULL);
1746 drv = reopen_state->bs->drv;
1747
1748 /* if we are to stay read-only, do not allow permission change
1749 * to r/w */
1750 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1751 reopen_state->flags & BDRV_O_RDWR) {
1752 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1753 reopen_state->bs->device_name);
1754 goto error;
1755 }
1756
1757
1758 ret = bdrv_flush(reopen_state->bs);
1759 if (ret) {
1760 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1761 strerror(-ret));
1762 goto error;
1763 }
1764
1765 if (drv->bdrv_reopen_prepare) {
1766 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1767 if (ret) {
1768 if (local_err != NULL) {
1769 error_propagate(errp, local_err);
1770 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001771 error_setg(errp, "failed while preparing to reopen image '%s'",
1772 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001773 }
1774 goto error;
1775 }
1776 } else {
1777 /* It is currently mandatory to have a bdrv_reopen_prepare()
1778 * handler for each supported drv. */
1779 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1780 drv->format_name, reopen_state->bs->device_name,
1781 "reopening of file");
1782 ret = -1;
1783 goto error;
1784 }
1785
1786 ret = 0;
1787
1788error:
1789 return ret;
1790}
1791
1792/*
1793 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1794 * makes them final by swapping the staging BlockDriverState contents into
1795 * the active BlockDriverState contents.
1796 */
1797void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1798{
1799 BlockDriver *drv;
1800
1801 assert(reopen_state != NULL);
1802 drv = reopen_state->bs->drv;
1803 assert(drv != NULL);
1804
1805 /* If there are any driver level actions to take */
1806 if (drv->bdrv_reopen_commit) {
1807 drv->bdrv_reopen_commit(reopen_state);
1808 }
1809
1810 /* set BDS specific flags now */
1811 reopen_state->bs->open_flags = reopen_state->flags;
1812 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1813 BDRV_O_CACHE_WB);
1814 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001815
Kevin Wolf3baca892014-07-16 17:48:16 +02001816 bdrv_refresh_limits(reopen_state->bs, NULL);
Jeff Codye971aa12012-09-20 15:13:19 -04001817}
1818
1819/*
1820 * Abort the reopen, and delete and free the staged changes in
1821 * reopen_state
1822 */
1823void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1824{
1825 BlockDriver *drv;
1826
1827 assert(reopen_state != NULL);
1828 drv = reopen_state->bs->drv;
1829 assert(drv != NULL);
1830
1831 if (drv->bdrv_reopen_abort) {
1832 drv->bdrv_reopen_abort(reopen_state);
1833 }
1834}
1835
1836
bellardfc01f7e2003-06-30 10:03:06 +00001837void bdrv_close(BlockDriverState *bs)
1838{
Max Reitz33384422014-06-20 21:57:33 +02001839 BdrvAioNotifier *ban, *ban_next;
1840
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001841 if (bs->job) {
1842 block_job_cancel_sync(bs->job);
1843 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001844 bdrv_drain_all(); /* complete I/O */
1845 bdrv_flush(bs);
1846 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001847 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001848
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001849 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001850 if (bs->backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001851 BlockDriverState *backing_hd = bs->backing_hd;
1852 bdrv_set_backing_hd(bs, NULL);
1853 bdrv_unref(backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001854 }
bellardea2384d2004-08-01 21:59:26 +00001855 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001856 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001857 bs->opaque = NULL;
1858 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001859 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001860 bs->backing_file[0] = '\0';
1861 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001862 bs->total_sectors = 0;
1863 bs->encrypted = 0;
1864 bs->valid_key = 0;
1865 bs->sg = 0;
1866 bs->growable = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001867 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001868 QDECREF(bs->options);
1869 bs->options = NULL;
Max Reitz91af7012014-07-18 20:24:56 +02001870 QDECREF(bs->full_open_options);
1871 bs->full_open_options = NULL;
bellardb3380822004-03-14 21:38:54 +00001872
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001873 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001874 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001875 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001876 }
bellardb3380822004-03-14 21:38:54 +00001877 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001878
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001879 bdrv_dev_change_media_cb(bs, false);
1880
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001881 /*throttling disk I/O limits*/
1882 if (bs->io_limits_enabled) {
1883 bdrv_io_limits_disable(bs);
1884 }
Max Reitz33384422014-06-20 21:57:33 +02001885
1886 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1887 g_free(ban);
1888 }
1889 QLIST_INIT(&bs->aio_notifiers);
bellardb3380822004-03-14 21:38:54 +00001890}
1891
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001892void bdrv_close_all(void)
1893{
1894 BlockDriverState *bs;
1895
Benoît Canetdc364f42014-01-23 21:31:32 +01001896 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001897 AioContext *aio_context = bdrv_get_aio_context(bs);
1898
1899 aio_context_acquire(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001900 bdrv_close(bs);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001901 aio_context_release(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001902 }
1903}
1904
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001905/* Check if any requests are in-flight (including throttled requests) */
1906static bool bdrv_requests_pending(BlockDriverState *bs)
1907{
1908 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1909 return true;
1910 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001911 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1912 return true;
1913 }
1914 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001915 return true;
1916 }
1917 if (bs->file && bdrv_requests_pending(bs->file)) {
1918 return true;
1919 }
1920 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1921 return true;
1922 }
1923 return false;
1924}
1925
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001926/*
1927 * Wait for pending requests to complete across all BlockDriverStates
1928 *
1929 * This function does not flush data to disk, use bdrv_flush_all() for that
1930 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001931 *
1932 * Note that completion of an asynchronous I/O operation can trigger any
1933 * number of other I/O operations on other devices---for example a coroutine
1934 * can be arbitrarily complex and a constant flow of I/O can come until the
1935 * coroutine is complete. Because of this, it is not possible to have a
1936 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001937 */
1938void bdrv_drain_all(void)
1939{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001940 /* Always run first iteration so any pending completion BHs run */
1941 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001942 BlockDriverState *bs;
1943
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001944 while (busy) {
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001945 busy = false;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001946
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001947 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1948 AioContext *aio_context = bdrv_get_aio_context(bs);
1949 bool bs_busy;
1950
1951 aio_context_acquire(aio_context);
Ming Lei448ad912014-07-04 18:04:33 +08001952 bdrv_flush_io_queue(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001953 bdrv_start_throttled_reqs(bs);
1954 bs_busy = bdrv_requests_pending(bs);
1955 bs_busy |= aio_poll(aio_context, bs_busy);
1956 aio_context_release(aio_context);
1957
1958 busy |= bs_busy;
1959 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001960 }
1961}
1962
Benoît Canetdc364f42014-01-23 21:31:32 +01001963/* make a BlockDriverState anonymous by removing from bdrv_state and
1964 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05001965 Also, NULL terminate the device_name to prevent double remove */
1966void bdrv_make_anon(BlockDriverState *bs)
1967{
1968 if (bs->device_name[0] != '\0') {
Benoît Canetdc364f42014-01-23 21:31:32 +01001969 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Ryan Harperd22b2f42011-03-29 20:51:47 -05001970 }
1971 bs->device_name[0] = '\0';
Benoît Canetdc364f42014-01-23 21:31:32 +01001972 if (bs->node_name[0] != '\0') {
1973 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1974 }
1975 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05001976}
1977
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02001978static void bdrv_rebind(BlockDriverState *bs)
1979{
1980 if (bs->drv && bs->drv->bdrv_rebind) {
1981 bs->drv->bdrv_rebind(bs);
1982 }
1983}
1984
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001985static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1986 BlockDriverState *bs_src)
1987{
1988 /* move some fields that need to stay attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001989
1990 /* dev info */
1991 bs_dest->dev_ops = bs_src->dev_ops;
1992 bs_dest->dev_opaque = bs_src->dev_opaque;
1993 bs_dest->dev = bs_src->dev;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001994 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001995 bs_dest->copy_on_read = bs_src->copy_on_read;
1996
1997 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1998
Benoît Canetcc0681c2013-09-02 14:14:39 +02001999 /* i/o throttled req */
2000 memcpy(&bs_dest->throttle_state,
2001 &bs_src->throttle_state,
2002 sizeof(ThrottleState));
2003 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
2004 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002005 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
2006
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002007 /* r/w error */
2008 bs_dest->on_read_error = bs_src->on_read_error;
2009 bs_dest->on_write_error = bs_src->on_write_error;
2010
2011 /* i/o status */
2012 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
2013 bs_dest->iostatus = bs_src->iostatus;
2014
2015 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08002016 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002017
Fam Zheng9fcb0252013-08-23 09:14:46 +08002018 /* reference count */
2019 bs_dest->refcnt = bs_src->refcnt;
2020
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002021 /* job */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002022 bs_dest->job = bs_src->job;
2023
2024 /* keep the same entry in bdrv_states */
2025 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
2026 bs_src->device_name);
Benoît Canetdc364f42014-01-23 21:31:32 +01002027 bs_dest->device_list = bs_src->device_list;
Fam Zhengfbe40ff2014-05-23 21:29:42 +08002028 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2029 sizeof(bs_dest->op_blockers));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002030}
2031
2032/*
2033 * Swap bs contents for two image chains while they are live,
2034 * while keeping required fields on the BlockDriverState that is
2035 * actually attached to a device.
2036 *
2037 * This will modify the BlockDriverState fields, and swap contents
2038 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2039 *
2040 * bs_new is required to be anonymous.
2041 *
2042 * This function does not create any image files.
2043 */
2044void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2045{
2046 BlockDriverState tmp;
2047
Benoît Canet90ce8a02014-03-05 23:48:29 +01002048 /* The code needs to swap the node_name but simply swapping node_list won't
2049 * work so first remove the nodes from the graph list, do the swap then
2050 * insert them back if needed.
2051 */
2052 if (bs_new->node_name[0] != '\0') {
2053 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2054 }
2055 if (bs_old->node_name[0] != '\0') {
2056 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2057 }
2058
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002059 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
2060 assert(bs_new->device_name[0] == '\0');
Fam Zhenge4654d22013-11-13 18:29:43 +08002061 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002062 assert(bs_new->job == NULL);
2063 assert(bs_new->dev == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002064 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002065 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002066
2067 tmp = *bs_new;
2068 *bs_new = *bs_old;
2069 *bs_old = tmp;
2070
2071 /* there are some fields that should not be swapped, move them back */
2072 bdrv_move_feature_fields(&tmp, bs_old);
2073 bdrv_move_feature_fields(bs_old, bs_new);
2074 bdrv_move_feature_fields(bs_new, &tmp);
2075
2076 /* bs_new shouldn't be in bdrv_states even after the swap! */
2077 assert(bs_new->device_name[0] == '\0');
2078
2079 /* Check a few fields that should remain attached to the device */
2080 assert(bs_new->dev == NULL);
2081 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002082 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002083 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002084
Benoît Canet90ce8a02014-03-05 23:48:29 +01002085 /* insert the nodes back into the graph node list if needed */
2086 if (bs_new->node_name[0] != '\0') {
2087 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2088 }
2089 if (bs_old->node_name[0] != '\0') {
2090 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2091 }
2092
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002093 bdrv_rebind(bs_new);
2094 bdrv_rebind(bs_old);
2095}
2096
Jeff Cody8802d1f2012-02-28 15:54:06 -05002097/*
2098 * Add new bs contents at the top of an image chain while the chain is
2099 * live, while keeping required fields on the top layer.
2100 *
2101 * This will modify the BlockDriverState fields, and swap contents
2102 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2103 *
Jeff Codyf6801b82012-03-27 16:30:19 -04002104 * bs_new is required to be anonymous.
2105 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05002106 * This function does not create any image files.
2107 */
2108void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2109{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002110 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002111
2112 /* The contents of 'tmp' will become bs_top, as we are
2113 * swapping bs_new and bs_top contents. */
Fam Zheng8d24cce2014-05-23 21:29:45 +08002114 bdrv_set_backing_hd(bs_top, bs_new);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002115}
2116
Fam Zheng4f6fd342013-08-23 09:14:47 +08002117static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00002118{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002119 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +02002120 assert(!bs->job);
Fam Zheng3718d8a2014-05-23 21:29:43 +08002121 assert(bdrv_op_blocker_is_empty(bs));
Fam Zheng4f6fd342013-08-23 09:14:47 +08002122 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08002123 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02002124
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02002125 bdrv_close(bs);
2126
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002127 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05002128 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00002129
Markus Armbruster3ae59582014-09-12 21:26:22 +02002130 drive_info_del(drive_get_by_blockdev(bs));
Anthony Liguori7267c092011-08-20 22:09:37 -05002131 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00002132}
2133
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002134int bdrv_attach_dev(BlockDriverState *bs, void *dev)
2135/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +02002136{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002137 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +02002138 return -EBUSY;
2139 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002140 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -03002141 bdrv_iostatus_reset(bs);
Stefan Hajnoczi2a871512014-07-07 15:15:53 +02002142
2143 /* We're expecting I/O from the device so bump up coroutine pool size */
2144 qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION);
Markus Armbruster18846de2010-06-29 16:58:30 +02002145 return 0;
2146}
2147
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002148/* TODO qdevified devices don't use this, remove when devices are qdevified */
2149void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02002150{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002151 if (bdrv_attach_dev(bs, dev) < 0) {
2152 abort();
2153 }
2154}
2155
2156void bdrv_detach_dev(BlockDriverState *bs, void *dev)
2157/* TODO change to DeviceState *dev when all users are qdevified */
2158{
2159 assert(bs->dev == dev);
2160 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02002161 bs->dev_ops = NULL;
2162 bs->dev_opaque = NULL;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01002163 bs->guest_block_size = 512;
Stefan Hajnoczi2a871512014-07-07 15:15:53 +02002164 qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION);
Markus Armbruster18846de2010-06-29 16:58:30 +02002165}
2166
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002167/* TODO change to return DeviceState * when all users are qdevified */
2168void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02002169{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002170 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02002171}
2172
Markus Armbruster0e49de52011-08-03 15:07:41 +02002173void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
2174 void *opaque)
2175{
2176 bs->dev_ops = ops;
2177 bs->dev_opaque = opaque;
2178}
2179
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002180static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02002181{
Markus Armbruster145feb12011-08-03 15:07:42 +02002182 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002183 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002184 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002185 if (tray_was_closed) {
2186 /* tray open */
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02002187 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2188 true, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002189 }
2190 if (load) {
2191 /* tray close */
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02002192 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2193 false, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002194 }
Markus Armbruster145feb12011-08-03 15:07:42 +02002195 }
2196}
2197
Markus Armbruster2c6942f2011-09-06 18:58:51 +02002198bool bdrv_dev_has_removable_media(BlockDriverState *bs)
2199{
2200 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
2201}
2202
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01002203void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
2204{
2205 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
2206 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
2207 }
2208}
2209
Markus Armbrustere4def802011-09-06 18:58:53 +02002210bool bdrv_dev_is_tray_open(BlockDriverState *bs)
2211{
2212 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
2213 return bs->dev_ops->is_tray_open(bs->dev_opaque);
2214 }
2215 return false;
2216}
2217
Markus Armbruster145feb12011-08-03 15:07:42 +02002218static void bdrv_dev_resize_cb(BlockDriverState *bs)
2219{
2220 if (bs->dev_ops && bs->dev_ops->resize_cb) {
2221 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02002222 }
2223}
2224
Markus Armbrusterf1076392011-09-06 18:58:46 +02002225bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
2226{
2227 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
2228 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
2229 }
2230 return false;
2231}
2232
aliguorie97fc192009-04-21 23:11:50 +00002233/*
2234 * Run consistency checks on an image
2235 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002236 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002237 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002238 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002239 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002240int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002241{
Max Reitz908bcd52014-08-07 22:47:55 +02002242 if (bs->drv == NULL) {
2243 return -ENOMEDIUM;
2244 }
aliguorie97fc192009-04-21 23:11:50 +00002245 if (bs->drv->bdrv_check == NULL) {
2246 return -ENOTSUP;
2247 }
2248
Kevin Wolfe076f332010-06-29 11:43:13 +02002249 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002250 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002251}
2252
Kevin Wolf8a426612010-07-16 17:17:01 +02002253#define COMMIT_BUF_SECTORS 2048
2254
bellard33e39632003-07-06 17:15:21 +00002255/* commit COW file into the raw image */
2256int bdrv_commit(BlockDriverState *bs)
2257{
bellard19cb3732006-08-19 11:45:59 +00002258 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002259 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002260 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002261 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002262 uint8_t *buf = NULL;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002263 char filename[PATH_MAX];
bellard33e39632003-07-06 17:15:21 +00002264
bellard19cb3732006-08-19 11:45:59 +00002265 if (!drv)
2266 return -ENOMEDIUM;
Liu Yuan6bb45152014-09-01 13:35:21 +08002267
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002268 if (!bs->backing_hd) {
2269 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002270 }
2271
Fam Zheng3718d8a2014-05-23 21:29:43 +08002272 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) ||
2273 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) {
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002274 return -EBUSY;
2275 }
2276
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002277 ro = bs->backing_hd->read_only;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002278 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2279 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002280 open_flags = bs->backing_hd->open_flags;
2281
2282 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002283 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2284 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002285 }
bellard33e39632003-07-06 17:15:21 +00002286 }
bellardea2384d2004-08-01 21:59:26 +00002287
Jeff Cody72706ea2014-01-24 09:02:35 -05002288 length = bdrv_getlength(bs);
2289 if (length < 0) {
2290 ret = length;
2291 goto ro_cleanup;
2292 }
2293
2294 backing_length = bdrv_getlength(bs->backing_hd);
2295 if (backing_length < 0) {
2296 ret = backing_length;
2297 goto ro_cleanup;
2298 }
2299
2300 /* If our top snapshot is larger than the backing file image,
2301 * grow the backing file image if possible. If not possible,
2302 * we must return an error */
2303 if (length > backing_length) {
2304 ret = bdrv_truncate(bs->backing_hd, length);
2305 if (ret < 0) {
2306 goto ro_cleanup;
2307 }
2308 }
2309
2310 total_sectors = length >> BDRV_SECTOR_BITS;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002311
2312 /* qemu_try_blockalign() for bs will choose an alignment that works for
2313 * bs->backing_hd as well, so no need to compare the alignment manually. */
2314 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2315 if (buf == NULL) {
2316 ret = -ENOMEM;
2317 goto ro_cleanup;
2318 }
bellardea2384d2004-08-01 21:59:26 +00002319
Kevin Wolf8a426612010-07-16 17:17:01 +02002320 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002321 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2322 if (ret < 0) {
2323 goto ro_cleanup;
2324 }
2325 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002326 ret = bdrv_read(bs, sector, buf, n);
2327 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002328 goto ro_cleanup;
2329 }
2330
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002331 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2332 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002333 goto ro_cleanup;
2334 }
bellardea2384d2004-08-01 21:59:26 +00002335 }
2336 }
bellard95389c82005-12-18 18:28:15 +00002337
Christoph Hellwig1d449522010-01-17 12:32:30 +01002338 if (drv->bdrv_make_empty) {
2339 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002340 if (ret < 0) {
2341 goto ro_cleanup;
2342 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002343 bdrv_flush(bs);
2344 }
bellard95389c82005-12-18 18:28:15 +00002345
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002346 /*
2347 * Make sure all data we wrote to the backing device is actually
2348 * stable on disk.
2349 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002350 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002351 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002352 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002353
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002354 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002355ro_cleanup:
Kevin Wolf857d4f42014-05-20 13:16:51 +02002356 qemu_vfree(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002357
2358 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002359 /* ignoring error return here */
2360 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002361 }
2362
Christoph Hellwig1d449522010-01-17 12:32:30 +01002363 return ret;
bellard33e39632003-07-06 17:15:21 +00002364}
2365
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002366int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002367{
2368 BlockDriverState *bs;
2369
Benoît Canetdc364f42014-01-23 21:31:32 +01002370 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002371 AioContext *aio_context = bdrv_get_aio_context(bs);
2372
2373 aio_context_acquire(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002374 if (bs->drv && bs->backing_hd) {
2375 int ret = bdrv_commit(bs);
2376 if (ret < 0) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002377 aio_context_release(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002378 return ret;
2379 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002380 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002381 aio_context_release(aio_context);
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002382 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002383 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002384}
2385
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002386/**
2387 * Remove an active request from the tracked requests list
2388 *
2389 * This function should be called when a tracked request is completing.
2390 */
2391static void tracked_request_end(BdrvTrackedRequest *req)
2392{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002393 if (req->serialising) {
2394 req->bs->serialising_in_flight--;
2395 }
2396
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002397 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002398 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002399}
2400
2401/**
2402 * Add an active request to the tracked requests list
2403 */
2404static void tracked_request_begin(BdrvTrackedRequest *req,
2405 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002406 int64_t offset,
2407 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002408{
2409 *req = (BdrvTrackedRequest){
2410 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002411 .offset = offset,
2412 .bytes = bytes,
2413 .is_write = is_write,
2414 .co = qemu_coroutine_self(),
2415 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002416 .overlap_offset = offset,
2417 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002418 };
2419
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002420 qemu_co_queue_init(&req->wait_queue);
2421
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002422 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2423}
2424
Kevin Wolfe96126f2014-02-08 10:42:18 +01002425static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002426{
Kevin Wolf73271452013-12-04 17:08:50 +01002427 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002428 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2429 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002430
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002431 if (!req->serialising) {
2432 req->bs->serialising_in_flight++;
2433 req->serialising = true;
2434 }
Kevin Wolf73271452013-12-04 17:08:50 +01002435
2436 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2437 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002438}
2439
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002440/**
2441 * Round a region to cluster boundaries
2442 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002443void bdrv_round_to_clusters(BlockDriverState *bs,
2444 int64_t sector_num, int nb_sectors,
2445 int64_t *cluster_sector_num,
2446 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002447{
2448 BlockDriverInfo bdi;
2449
2450 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2451 *cluster_sector_num = sector_num;
2452 *cluster_nb_sectors = nb_sectors;
2453 } else {
2454 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2455 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2456 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2457 nb_sectors, c);
2458 }
2459}
2460
Kevin Wolf73271452013-12-04 17:08:50 +01002461static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002462{
2463 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002464 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002465
Kevin Wolf73271452013-12-04 17:08:50 +01002466 ret = bdrv_get_info(bs, &bdi);
2467 if (ret < 0 || bdi.cluster_size == 0) {
2468 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002469 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002470 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002471 }
2472}
2473
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002474static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002475 int64_t offset, unsigned int bytes)
2476{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002477 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002478 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002479 return false;
2480 }
2481 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002482 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002483 return false;
2484 }
2485 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002486}
2487
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002488static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002489{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002490 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002491 BdrvTrackedRequest *req;
2492 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002493 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002494
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002495 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002496 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002497 }
2498
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002499 do {
2500 retry = false;
2501 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002502 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002503 continue;
2504 }
Kevin Wolf73271452013-12-04 17:08:50 +01002505 if (tracked_request_overlaps(req, self->overlap_offset,
2506 self->overlap_bytes))
2507 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002508 /* Hitting this means there was a reentrant request, for
2509 * example, a block driver issuing nested requests. This must
2510 * never happen since it means deadlock.
2511 */
2512 assert(qemu_coroutine_self() != req->co);
2513
Kevin Wolf64604402013-12-13 13:04:35 +01002514 /* If the request is already (indirectly) waiting for us, or
2515 * will wait for us as soon as it wakes up, then just go on
2516 * (instead of producing a deadlock in the former case). */
2517 if (!req->waiting_for) {
2518 self->waiting_for = req;
2519 qemu_co_queue_wait(&req->wait_queue);
2520 self->waiting_for = NULL;
2521 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002522 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002523 break;
2524 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002525 }
2526 }
2527 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002528
2529 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002530}
2531
Kevin Wolf756e6732010-01-12 12:55:17 +01002532/*
2533 * Return values:
2534 * 0 - success
2535 * -EINVAL - backing format specified, but no file
2536 * -ENOSPC - can't update the backing file because no space is left in the
2537 * image file header
2538 * -ENOTSUP - format driver doesn't support changing the backing file
2539 */
2540int bdrv_change_backing_file(BlockDriverState *bs,
2541 const char *backing_file, const char *backing_fmt)
2542{
2543 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002544 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002545
Paolo Bonzini5f377792012-04-12 14:01:01 +02002546 /* Backing file format doesn't make sense without a backing file */
2547 if (backing_fmt && !backing_file) {
2548 return -EINVAL;
2549 }
2550
Kevin Wolf756e6732010-01-12 12:55:17 +01002551 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002552 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002553 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002554 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002555 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002556
2557 if (ret == 0) {
2558 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2559 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2560 }
2561 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002562}
2563
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002564/*
2565 * Finds the image layer in the chain that has 'bs' as its backing file.
2566 *
2567 * active is the current topmost image.
2568 *
2569 * Returns NULL if bs is not found in active's image chain,
2570 * or if active == bs.
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002571 *
2572 * Returns the bottommost base image if bs == NULL.
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002573 */
2574BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2575 BlockDriverState *bs)
2576{
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002577 while (active && bs != active->backing_hd) {
2578 active = active->backing_hd;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002579 }
2580
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002581 return active;
2582}
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002583
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002584/* Given a BDS, searches for the base layer. */
2585BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2586{
2587 return bdrv_find_overlay(bs, NULL);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002588}
2589
2590typedef struct BlkIntermediateStates {
2591 BlockDriverState *bs;
2592 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2593} BlkIntermediateStates;
2594
2595
2596/*
2597 * Drops images above 'base' up to and including 'top', and sets the image
2598 * above 'top' to have base as its backing file.
2599 *
2600 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2601 * information in 'bs' can be properly updated.
2602 *
2603 * E.g., this will convert the following chain:
2604 * bottom <- base <- intermediate <- top <- active
2605 *
2606 * to
2607 *
2608 * bottom <- base <- active
2609 *
2610 * It is allowed for bottom==base, in which case it converts:
2611 *
2612 * base <- intermediate <- top <- active
2613 *
2614 * to
2615 *
2616 * base <- active
2617 *
Jeff Cody54e26902014-06-25 15:40:10 -04002618 * If backing_file_str is non-NULL, it will be used when modifying top's
2619 * overlay image metadata.
2620 *
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002621 * Error conditions:
2622 * if active == top, that is considered an error
2623 *
2624 */
2625int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
Jeff Cody54e26902014-06-25 15:40:10 -04002626 BlockDriverState *base, const char *backing_file_str)
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002627{
2628 BlockDriverState *intermediate;
2629 BlockDriverState *base_bs = NULL;
2630 BlockDriverState *new_top_bs = NULL;
2631 BlkIntermediateStates *intermediate_state, *next;
2632 int ret = -EIO;
2633
2634 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2635 QSIMPLEQ_INIT(&states_to_delete);
2636
2637 if (!top->drv || !base->drv) {
2638 goto exit;
2639 }
2640
2641 new_top_bs = bdrv_find_overlay(active, top);
2642
2643 if (new_top_bs == NULL) {
2644 /* we could not find the image above 'top', this is an error */
2645 goto exit;
2646 }
2647
2648 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2649 * to do, no intermediate images */
2650 if (new_top_bs->backing_hd == base) {
2651 ret = 0;
2652 goto exit;
2653 }
2654
2655 intermediate = top;
2656
2657 /* now we will go down through the list, and add each BDS we find
2658 * into our deletion queue, until we hit the 'base'
2659 */
2660 while (intermediate) {
Markus Armbruster5839e532014-08-19 10:31:08 +02002661 intermediate_state = g_new0(BlkIntermediateStates, 1);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002662 intermediate_state->bs = intermediate;
2663 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2664
2665 if (intermediate->backing_hd == base) {
2666 base_bs = intermediate->backing_hd;
2667 break;
2668 }
2669 intermediate = intermediate->backing_hd;
2670 }
2671 if (base_bs == NULL) {
2672 /* something went wrong, we did not end at the base. safely
2673 * unravel everything, and exit with error */
2674 goto exit;
2675 }
2676
2677 /* success - we can delete the intermediate states, and link top->base */
Jeff Cody54e26902014-06-25 15:40:10 -04002678 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2679 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002680 base_bs->drv ? base_bs->drv->format_name : "");
2681 if (ret) {
2682 goto exit;
2683 }
Fam Zheng920beae2014-05-23 21:29:46 +08002684 bdrv_set_backing_hd(new_top_bs, base_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002685
2686 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2687 /* so that bdrv_close() does not recursively close the chain */
Fam Zheng920beae2014-05-23 21:29:46 +08002688 bdrv_set_backing_hd(intermediate_state->bs, NULL);
Fam Zheng4f6fd342013-08-23 09:14:47 +08002689 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002690 }
2691 ret = 0;
2692
2693exit:
2694 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2695 g_free(intermediate_state);
2696 }
2697 return ret;
2698}
2699
2700
aliguori71d07702009-03-03 17:37:16 +00002701static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2702 size_t size)
2703{
2704 int64_t len;
2705
Kevin Wolf1dd3a442014-04-14 14:48:16 +02002706 if (size > INT_MAX) {
2707 return -EIO;
2708 }
2709
aliguori71d07702009-03-03 17:37:16 +00002710 if (!bdrv_is_inserted(bs))
2711 return -ENOMEDIUM;
2712
2713 if (bs->growable)
2714 return 0;
2715
2716 len = bdrv_getlength(bs);
2717
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002718 if (offset < 0)
2719 return -EIO;
2720
2721 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00002722 return -EIO;
2723
2724 return 0;
2725}
2726
2727static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2728 int nb_sectors)
2729{
Kevin Wolf54db38a2014-04-14 14:47:14 +02002730 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
Kevin Wolf8f4754e2014-03-26 13:06:02 +01002731 return -EIO;
2732 }
2733
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002734 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2735 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002736}
2737
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002738typedef struct RwCo {
2739 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002740 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002741 QEMUIOVector *qiov;
2742 bool is_write;
2743 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002744 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002745} RwCo;
2746
2747static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2748{
2749 RwCo *rwco = opaque;
2750
2751 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002752 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2753 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002754 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002755 } else {
2756 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2757 rwco->qiov->size, rwco->qiov,
2758 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002759 }
2760}
2761
2762/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002763 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002764 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002765static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2766 QEMUIOVector *qiov, bool is_write,
2767 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002768{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002769 Coroutine *co;
2770 RwCo rwco = {
2771 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002772 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002773 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002774 .is_write = is_write,
2775 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002776 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002777 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002778
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002779 /**
2780 * In sync call context, when the vcpu is blocked, this throttling timer
2781 * will not fire; so the I/O throttling function has to be disabled here
2782 * if it has been enabled.
2783 */
2784 if (bs->io_limits_enabled) {
2785 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2786 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2787 bdrv_io_limits_disable(bs);
2788 }
2789
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002790 if (qemu_in_coroutine()) {
2791 /* Fast-path if already in coroutine context */
2792 bdrv_rw_co_entry(&rwco);
2793 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002794 AioContext *aio_context = bdrv_get_aio_context(bs);
2795
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002796 co = qemu_coroutine_create(bdrv_rw_co_entry);
2797 qemu_coroutine_enter(co, &rwco);
2798 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002799 aio_poll(aio_context, true);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002800 }
2801 }
2802 return rwco.ret;
2803}
2804
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002805/*
2806 * Process a synchronous request using coroutines
2807 */
2808static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002809 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002810{
2811 QEMUIOVector qiov;
2812 struct iovec iov = {
2813 .iov_base = (void *)buf,
2814 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2815 };
2816
Kevin Wolfda15ee52014-04-14 15:39:36 +02002817 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
2818 return -EINVAL;
2819 }
2820
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002821 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002822 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2823 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002824}
2825
bellard19cb3732006-08-19 11:45:59 +00002826/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002827int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002828 uint8_t *buf, int nb_sectors)
2829{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002830 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002831}
2832
Markus Armbruster07d27a42012-06-29 17:34:29 +02002833/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2834int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2835 uint8_t *buf, int nb_sectors)
2836{
2837 bool enabled;
2838 int ret;
2839
2840 enabled = bs->io_limits_enabled;
2841 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002842 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002843 bs->io_limits_enabled = enabled;
2844 return ret;
2845}
2846
ths5fafdf22007-09-16 21:08:06 +00002847/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002848 -EIO generic I/O error (may happen for all errors)
2849 -ENOMEDIUM No media inserted.
2850 -EINVAL Invalid sector number or nb_sectors
2851 -EACCES Trying to write a read-only device
2852*/
ths5fafdf22007-09-16 21:08:06 +00002853int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002854 const uint8_t *buf, int nb_sectors)
2855{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002856 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002857}
2858
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002859int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2860 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002861{
2862 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002863 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002864}
2865
Peter Lievend75cbb52013-10-24 12:07:03 +02002866/*
2867 * Completely zero out a block device with the help of bdrv_write_zeroes.
2868 * The operation is sped up by checking the block status and only writing
2869 * zeroes to the device if they currently do not return zeroes. Optional
2870 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2871 *
2872 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2873 */
2874int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2875{
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002876 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
Peter Lievend75cbb52013-10-24 12:07:03 +02002877 int n;
2878
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002879 target_sectors = bdrv_nb_sectors(bs);
2880 if (target_sectors < 0) {
2881 return target_sectors;
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002882 }
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002883
Peter Lievend75cbb52013-10-24 12:07:03 +02002884 for (;;) {
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002885 nb_sectors = target_sectors - sector_num;
Peter Lievend75cbb52013-10-24 12:07:03 +02002886 if (nb_sectors <= 0) {
2887 return 0;
2888 }
2889 if (nb_sectors > INT_MAX) {
2890 nb_sectors = INT_MAX;
2891 }
2892 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002893 if (ret < 0) {
2894 error_report("error getting block status at sector %" PRId64 ": %s",
2895 sector_num, strerror(-ret));
2896 return ret;
2897 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002898 if (ret & BDRV_BLOCK_ZERO) {
2899 sector_num += n;
2900 continue;
2901 }
2902 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2903 if (ret < 0) {
2904 error_report("error writing zeroes at sector %" PRId64 ": %s",
2905 sector_num, strerror(-ret));
2906 return ret;
2907 }
2908 sector_num += n;
2909 }
2910}
2911
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002912int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002913{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002914 QEMUIOVector qiov;
2915 struct iovec iov = {
2916 .iov_base = (void *)buf,
2917 .iov_len = bytes,
2918 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002919 int ret;
bellard83f64092006-08-01 16:21:11 +00002920
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002921 if (bytes < 0) {
2922 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002923 }
2924
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002925 qemu_iovec_init_external(&qiov, &iov, 1);
2926 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2927 if (ret < 0) {
2928 return ret;
bellard83f64092006-08-01 16:21:11 +00002929 }
2930
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002931 return bytes;
bellard83f64092006-08-01 16:21:11 +00002932}
2933
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002934int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002935{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002936 int ret;
bellard83f64092006-08-01 16:21:11 +00002937
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002938 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2939 if (ret < 0) {
2940 return ret;
bellard83f64092006-08-01 16:21:11 +00002941 }
2942
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002943 return qiov->size;
2944}
2945
2946int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002947 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002948{
2949 QEMUIOVector qiov;
2950 struct iovec iov = {
2951 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002952 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002953 };
2954
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002955 if (bytes < 0) {
2956 return -EINVAL;
2957 }
2958
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002959 qemu_iovec_init_external(&qiov, &iov, 1);
2960 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002961}
bellard83f64092006-08-01 16:21:11 +00002962
Kevin Wolff08145f2010-06-16 16:38:15 +02002963/*
2964 * Writes to the file and ensures that no writes are reordered across this
2965 * request (acts as a barrier)
2966 *
2967 * Returns 0 on success, -errno in error cases.
2968 */
2969int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2970 const void *buf, int count)
2971{
2972 int ret;
2973
2974 ret = bdrv_pwrite(bs, offset, buf, count);
2975 if (ret < 0) {
2976 return ret;
2977 }
2978
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002979 /* No flush needed for cache modes that already do it */
2980 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002981 bdrv_flush(bs);
2982 }
2983
2984 return 0;
2985}
2986
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002987static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002988 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2989{
2990 /* Perform I/O through a temporary buffer so that users who scribble over
2991 * their read buffer while the operation is in progress do not end up
2992 * modifying the image file. This is critical for zero-copy guest I/O
2993 * where anything might happen inside guest memory.
2994 */
2995 void *bounce_buffer;
2996
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002997 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002998 struct iovec iov;
2999 QEMUIOVector bounce_qiov;
3000 int64_t cluster_sector_num;
3001 int cluster_nb_sectors;
3002 size_t skip_bytes;
3003 int ret;
3004
3005 /* Cover entire cluster so no additional backing file I/O is required when
3006 * allocating cluster in the image file.
3007 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01003008 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
3009 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003010
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003011 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
3012 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003013
3014 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
Kevin Wolf857d4f42014-05-20 13:16:51 +02003015 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
3016 if (bounce_buffer == NULL) {
3017 ret = -ENOMEM;
3018 goto err;
3019 }
3020
Stefan Hajnocziab185922011-11-17 13:40:31 +00003021 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3022
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003023 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3024 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003025 if (ret < 0) {
3026 goto err;
3027 }
3028
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003029 if (drv->bdrv_co_write_zeroes &&
3030 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01003031 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003032 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003033 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003034 /* This does not change the data on the disk, it is not necessary
3035 * to flush even in cache=writethrough mode.
3036 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003037 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00003038 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003039 }
3040
Stefan Hajnocziab185922011-11-17 13:40:31 +00003041 if (ret < 0) {
3042 /* It might be okay to ignore write errors for guest requests. If this
3043 * is a deliberate copy-on-read then we don't want to ignore the error.
3044 * Simply report it in all cases.
3045 */
3046 goto err;
3047 }
3048
3049 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04003050 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3051 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003052
3053err:
3054 qemu_vfree(bounce_buffer);
3055 return ret;
3056}
3057
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003058/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003059 * Forwards an already correctly aligned request to the BlockDriver. This
3060 * handles copy on read and zeroing after EOF; any other features must be
3061 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003062 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003063static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003064 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01003065 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02003066{
3067 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003068 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003069
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003070 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3071 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003072
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003073 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3074 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003075 assert(!qiov || bytes == qiov->size);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003076
3077 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003078 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01003079 /* If we touch the same cluster it counts as an overlap. This
3080 * guarantees that allocating writes will be serialized and not race
3081 * with each other for the same cluster. For example, in copy-on-read
3082 * it ensures that the CoR read and write operations are atomic and
3083 * guest writes cannot interleave between them. */
3084 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003085 }
3086
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01003087 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00003088
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003089 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00003090 int pnum;
3091
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003092 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003093 if (ret < 0) {
3094 goto out;
3095 }
3096
3097 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003098 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003099 goto out;
3100 }
3101 }
3102
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003103 /* Forward the request to the BlockDriver */
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003104 if (!(bs->zero_beyond_eof && bs->growable)) {
3105 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3106 } else {
3107 /* Read zeros after EOF of growable BDSes */
Markus Armbruster40490822014-06-26 13:23:19 +02003108 int64_t total_sectors, max_nb_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003109
Markus Armbruster40490822014-06-26 13:23:19 +02003110 total_sectors = bdrv_nb_sectors(bs);
3111 if (total_sectors < 0) {
3112 ret = total_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003113 goto out;
3114 }
3115
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01003116 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
3117 align >> BDRV_SECTOR_BITS);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003118 if (max_nb_sectors > 0) {
Kevin Wolf33f461e2014-07-03 13:21:24 +02003119 QEMUIOVector local_qiov;
3120 size_t local_sectors;
3121
3122 max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
3123 local_sectors = MIN(max_nb_sectors, nb_sectors);
3124
3125 qemu_iovec_init(&local_qiov, qiov->niov);
3126 qemu_iovec_concat(&local_qiov, qiov, 0,
3127 local_sectors * BDRV_SECTOR_SIZE);
3128
3129 ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
3130 &local_qiov);
3131
3132 qemu_iovec_destroy(&local_qiov);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003133 } else {
3134 ret = 0;
3135 }
3136
3137 /* Reading beyond end of file is supposed to produce zeroes */
3138 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3139 uint64_t offset = MAX(0, total_sectors - sector_num);
3140 uint64_t bytes = (sector_num + nb_sectors - offset) *
3141 BDRV_SECTOR_SIZE;
3142 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3143 }
3144 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00003145
3146out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003147 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003148}
3149
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003150/*
3151 * Handle a read request in coroutine context
3152 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003153static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3154 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003155 BdrvRequestFlags flags)
3156{
3157 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01003158 BdrvTrackedRequest req;
3159
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003160 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3161 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3162 uint8_t *head_buf = NULL;
3163 uint8_t *tail_buf = NULL;
3164 QEMUIOVector local_qiov;
3165 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003166 int ret;
3167
3168 if (!drv) {
3169 return -ENOMEDIUM;
3170 }
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003171 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003172 return -EIO;
3173 }
3174
3175 if (bs->copy_on_read) {
3176 flags |= BDRV_REQ_COPY_ON_READ;
3177 }
3178
3179 /* throttling disk I/O */
3180 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003181 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003182 }
3183
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003184 /* Align read if necessary by padding qiov */
3185 if (offset & (align - 1)) {
3186 head_buf = qemu_blockalign(bs, align);
3187 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3188 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3189 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3190 use_local_qiov = true;
3191
3192 bytes += offset & (align - 1);
3193 offset = offset & ~(align - 1);
3194 }
3195
3196 if ((offset + bytes) & (align - 1)) {
3197 if (!use_local_qiov) {
3198 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3199 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3200 use_local_qiov = true;
3201 }
3202 tail_buf = qemu_blockalign(bs, align);
3203 qemu_iovec_add(&local_qiov, tail_buf,
3204 align - ((offset + bytes) & (align - 1)));
3205
3206 bytes = ROUND_UP(bytes, align);
3207 }
3208
Kevin Wolf65afd212013-12-03 14:55:55 +01003209 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003210 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003211 use_local_qiov ? &local_qiov : qiov,
3212 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003213 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003214
3215 if (use_local_qiov) {
3216 qemu_iovec_destroy(&local_qiov);
3217 qemu_vfree(head_buf);
3218 qemu_vfree(tail_buf);
3219 }
3220
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003221 return ret;
3222}
3223
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003224static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3225 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3226 BdrvRequestFlags flags)
3227{
3228 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3229 return -EINVAL;
3230 }
3231
3232 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3233 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3234}
3235
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003236int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003237 int nb_sectors, QEMUIOVector *qiov)
3238{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003239 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003240
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003241 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3242}
3243
3244int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3245 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3246{
3247 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3248
3249 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3250 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003251}
3252
Peter Lievenc31cb702013-10-24 12:06:58 +02003253/* if no limit is specified in the BlockLimits use a default
3254 * of 32768 512-byte sectors (16 MiB) per request.
3255 */
3256#define MAX_WRITE_ZEROES_DEFAULT 32768
3257
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003258static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003259 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003260{
3261 BlockDriver *drv = bs->drv;
3262 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003263 struct iovec iov = {0};
3264 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003265
Peter Lievenc31cb702013-10-24 12:06:58 +02003266 int max_write_zeroes = bs->bl.max_write_zeroes ?
3267 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
Kevin Wolf621f0582012-03-20 15:12:58 +01003268
Peter Lievenc31cb702013-10-24 12:06:58 +02003269 while (nb_sectors > 0 && !ret) {
3270 int num = nb_sectors;
3271
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003272 /* Align request. Block drivers can expect the "bulk" of the request
3273 * to be aligned.
3274 */
3275 if (bs->bl.write_zeroes_alignment
3276 && num > bs->bl.write_zeroes_alignment) {
3277 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3278 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003279 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003280 num -= sector_num % bs->bl.write_zeroes_alignment;
3281 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3282 /* Shorten the request to the last aligned sector. num cannot
3283 * underflow because num > bs->bl.write_zeroes_alignment.
3284 */
3285 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003286 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003287 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003288
3289 /* limit request size */
3290 if (num > max_write_zeroes) {
3291 num = max_write_zeroes;
3292 }
3293
3294 ret = -ENOTSUP;
3295 /* First try the efficient write zeroes operation */
3296 if (drv->bdrv_co_write_zeroes) {
3297 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3298 }
3299
3300 if (ret == -ENOTSUP) {
3301 /* Fall back to bounce buffer if write zeroes is unsupported */
3302 iov.iov_len = num * BDRV_SECTOR_SIZE;
3303 if (iov.iov_base == NULL) {
Kevin Wolf857d4f42014-05-20 13:16:51 +02003304 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3305 if (iov.iov_base == NULL) {
3306 ret = -ENOMEM;
3307 goto fail;
3308 }
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003309 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003310 }
3311 qemu_iovec_init_external(&qiov, &iov, 1);
3312
3313 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003314
3315 /* Keep bounce buffer around if it is big enough for all
3316 * all future requests.
3317 */
3318 if (num < max_write_zeroes) {
3319 qemu_vfree(iov.iov_base);
3320 iov.iov_base = NULL;
3321 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003322 }
3323
3324 sector_num += num;
3325 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003326 }
3327
Kevin Wolf857d4f42014-05-20 13:16:51 +02003328fail:
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003329 qemu_vfree(iov.iov_base);
3330 return ret;
3331}
3332
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003333/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003334 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003335 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003336static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003337 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3338 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003339{
3340 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003341 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003342 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003343
Kevin Wolfb404f722013-12-03 14:02:23 +01003344 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3345 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003346
Kevin Wolfb404f722013-12-03 14:02:23 +01003347 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3348 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003349 assert(!qiov || bytes == qiov->size);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003350
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003351 waited = wait_serialising_requests(req);
3352 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003353 assert(req->overlap_offset <= offset);
3354 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003355
Kevin Wolf65afd212013-12-03 14:55:55 +01003356 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003357
Peter Lieven465bee12014-05-18 00:58:19 +02003358 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3359 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3360 qemu_iovec_is_zero(qiov)) {
3361 flags |= BDRV_REQ_ZERO_WRITE;
3362 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3363 flags |= BDRV_REQ_MAY_UNMAP;
3364 }
3365 }
3366
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003367 if (ret < 0) {
3368 /* Do nothing, write notifier decided to fail this request */
3369 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003370 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003371 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003372 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003373 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003374 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3375 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003376 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003377
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003378 if (ret == 0 && !bs->enable_write_cache) {
3379 ret = bdrv_co_flush(bs);
3380 }
3381
Fam Zhenge4654d22013-11-13 18:29:43 +08003382 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003383
Benoît Canet5366d0c2014-09-05 15:46:18 +02003384 block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
Benoît Canet5e5a94b2014-09-05 15:46:16 +02003385
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003386 if (bs->growable && ret >= 0) {
3387 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3388 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003389
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003390 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003391}
3392
Kevin Wolfb404f722013-12-03 14:02:23 +01003393/*
3394 * Handle a write request in coroutine context
3395 */
Kevin Wolf66015532013-12-03 14:40:18 +01003396static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3397 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003398 BdrvRequestFlags flags)
3399{
Kevin Wolf65afd212013-12-03 14:55:55 +01003400 BdrvTrackedRequest req;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003401 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3402 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3403 uint8_t *head_buf = NULL;
3404 uint8_t *tail_buf = NULL;
3405 QEMUIOVector local_qiov;
3406 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003407 int ret;
3408
3409 if (!bs->drv) {
3410 return -ENOMEDIUM;
3411 }
3412 if (bs->read_only) {
3413 return -EACCES;
3414 }
Kevin Wolf66015532013-12-03 14:40:18 +01003415 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfb404f722013-12-03 14:02:23 +01003416 return -EIO;
3417 }
3418
Kevin Wolfb404f722013-12-03 14:02:23 +01003419 /* throttling disk I/O */
3420 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003421 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003422 }
3423
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003424 /*
3425 * Align write if necessary by performing a read-modify-write cycle.
3426 * Pad qiov with the read parts and be sure to have a tracked request not
3427 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3428 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003429 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003430
3431 if (offset & (align - 1)) {
3432 QEMUIOVector head_qiov;
3433 struct iovec head_iov;
3434
3435 mark_request_serialising(&req, align);
3436 wait_serialising_requests(&req);
3437
3438 head_buf = qemu_blockalign(bs, align);
3439 head_iov = (struct iovec) {
3440 .iov_base = head_buf,
3441 .iov_len = align,
3442 };
3443 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3444
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003445 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003446 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3447 align, &head_qiov, 0);
3448 if (ret < 0) {
3449 goto fail;
3450 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003451 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003452
3453 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3454 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3455 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3456 use_local_qiov = true;
3457
3458 bytes += offset & (align - 1);
3459 offset = offset & ~(align - 1);
3460 }
3461
3462 if ((offset + bytes) & (align - 1)) {
3463 QEMUIOVector tail_qiov;
3464 struct iovec tail_iov;
3465 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003466 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003467
3468 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003469 waited = wait_serialising_requests(&req);
3470 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003471
3472 tail_buf = qemu_blockalign(bs, align);
3473 tail_iov = (struct iovec) {
3474 .iov_base = tail_buf,
3475 .iov_len = align,
3476 };
3477 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3478
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003479 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003480 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3481 align, &tail_qiov, 0);
3482 if (ret < 0) {
3483 goto fail;
3484 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003485 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003486
3487 if (!use_local_qiov) {
3488 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3489 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3490 use_local_qiov = true;
3491 }
3492
3493 tail_bytes = (offset + bytes) & (align - 1);
3494 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3495
3496 bytes = ROUND_UP(bytes, align);
3497 }
3498
3499 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3500 use_local_qiov ? &local_qiov : qiov,
3501 flags);
3502
3503fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003504 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003505
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003506 if (use_local_qiov) {
3507 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003508 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003509 qemu_vfree(head_buf);
3510 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003511
Kevin Wolfb404f722013-12-03 14:02:23 +01003512 return ret;
3513}
3514
Kevin Wolf66015532013-12-03 14:40:18 +01003515static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3516 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3517 BdrvRequestFlags flags)
3518{
3519 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3520 return -EINVAL;
3521 }
3522
3523 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3524 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3525}
3526
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003527int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3528 int nb_sectors, QEMUIOVector *qiov)
3529{
3530 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3531
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003532 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3533}
3534
3535int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003536 int64_t sector_num, int nb_sectors,
3537 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003538{
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003539 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003540
Peter Lievend32f35c2013-10-24 12:06:52 +02003541 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3542 flags &= ~BDRV_REQ_MAY_UNMAP;
3543 }
3544
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003545 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003546 BDRV_REQ_ZERO_WRITE | flags);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003547}
3548
bellard83f64092006-08-01 16:21:11 +00003549/**
bellard83f64092006-08-01 16:21:11 +00003550 * Truncate file to 'offset' bytes (needed only for file protocols)
3551 */
3552int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3553{
3554 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003555 int ret;
bellard83f64092006-08-01 16:21:11 +00003556 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003557 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003558 if (!drv->bdrv_truncate)
3559 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003560 if (bs->read_only)
3561 return -EACCES;
Jeff Cody9c75e162014-06-25 16:55:30 -04003562
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003563 ret = drv->bdrv_truncate(bs, offset);
3564 if (ret == 0) {
3565 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02003566 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003567 }
3568 return ret;
bellard83f64092006-08-01 16:21:11 +00003569}
3570
3571/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003572 * Length of a allocated file in bytes. Sparse files are counted by actual
3573 * allocated space. Return < 0 if error or unknown.
3574 */
3575int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3576{
3577 BlockDriver *drv = bs->drv;
3578 if (!drv) {
3579 return -ENOMEDIUM;
3580 }
3581 if (drv->bdrv_get_allocated_file_size) {
3582 return drv->bdrv_get_allocated_file_size(bs);
3583 }
3584 if (bs->file) {
3585 return bdrv_get_allocated_file_size(bs->file);
3586 }
3587 return -ENOTSUP;
3588}
3589
3590/**
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003591 * Return number of sectors on success, -errno on error.
bellard83f64092006-08-01 16:21:11 +00003592 */
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003593int64_t bdrv_nb_sectors(BlockDriverState *bs)
bellard83f64092006-08-01 16:21:11 +00003594{
3595 BlockDriver *drv = bs->drv;
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003596
bellard83f64092006-08-01 16:21:11 +00003597 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003598 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003599
Kevin Wolfb94a2612013-10-29 12:18:58 +01003600 if (drv->has_variable_length) {
3601 int ret = refresh_total_sectors(bs, bs->total_sectors);
3602 if (ret < 0) {
3603 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003604 }
bellard83f64092006-08-01 16:21:11 +00003605 }
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003606 return bs->total_sectors;
3607}
3608
3609/**
3610 * Return length in bytes on success, -errno on error.
3611 * The length is always a multiple of BDRV_SECTOR_SIZE.
3612 */
3613int64_t bdrv_getlength(BlockDriverState *bs)
3614{
3615 int64_t ret = bdrv_nb_sectors(bs);
3616
3617 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003618}
3619
bellard19cb3732006-08-19 11:45:59 +00003620/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003621void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003622{
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003623 int64_t nb_sectors = bdrv_nb_sectors(bs);
3624
3625 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
bellardfc01f7e2003-06-30 10:03:06 +00003626}
bellardcf989512004-02-16 21:56:36 +00003627
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003628void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3629 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003630{
3631 bs->on_read_error = on_read_error;
3632 bs->on_write_error = on_write_error;
3633}
3634
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003635BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003636{
3637 return is_read ? bs->on_read_error : bs->on_write_error;
3638}
3639
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003640BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3641{
3642 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3643
3644 switch (on_err) {
3645 case BLOCKDEV_ON_ERROR_ENOSPC:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003646 return (error == ENOSPC) ?
3647 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003648 case BLOCKDEV_ON_ERROR_STOP:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003649 return BLOCK_ERROR_ACTION_STOP;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003650 case BLOCKDEV_ON_ERROR_REPORT:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003651 return BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003652 case BLOCKDEV_ON_ERROR_IGNORE:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003653 return BLOCK_ERROR_ACTION_IGNORE;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003654 default:
3655 abort();
3656 }
3657}
3658
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003659static void send_qmp_error_event(BlockDriverState *bs,
3660 BlockErrorAction action,
3661 bool is_read, int error)
3662{
3663 BlockErrorAction ac;
3664
3665 ac = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3666 qapi_event_send_block_io_error(bdrv_get_device_name(bs), ac, action,
3667 bdrv_iostatus_is_enabled(bs),
Luiz Capitulino624ff572014-09-11 10:25:48 -04003668 error == ENOSPC, strerror(error),
3669 &error_abort);
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003670}
3671
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003672/* This is done by device models because, while the block layer knows
3673 * about the error, it does not know whether an operation comes from
3674 * the device or the block layer (from a job, for example).
3675 */
3676void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3677 bool is_read, int error)
3678{
3679 assert(error >= 0);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003680
Wenchao Xiaa5895692014-06-18 08:43:30 +02003681 if (action == BLOCK_ERROR_ACTION_STOP) {
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003682 /* First set the iostatus, so that "info block" returns an iostatus
3683 * that matches the events raised so far (an additional error iostatus
3684 * is fine, but not a lost one).
3685 */
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003686 bdrv_iostatus_set_err(bs, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003687
3688 /* Then raise the request to stop the VM and the event.
3689 * qemu_system_vmstop_request_prepare has two effects. First,
3690 * it ensures that the STOP event always comes after the
3691 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3692 * can observe the STOP event and do a "cont" before the STOP
3693 * event is issued, the VM will not stop. In this case, vm_start()
3694 * also ensures that the STOP/RESUME pair of events is emitted.
3695 */
3696 qemu_system_vmstop_request_prepare();
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003697 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003698 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3699 } else {
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003700 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003701 }
3702}
3703
bellardb3380822004-03-14 21:38:54 +00003704int bdrv_is_read_only(BlockDriverState *bs)
3705{
3706 return bs->read_only;
3707}
3708
ths985a03b2007-12-24 16:10:43 +00003709int bdrv_is_sg(BlockDriverState *bs)
3710{
3711 return bs->sg;
3712}
3713
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003714int bdrv_enable_write_cache(BlockDriverState *bs)
3715{
3716 return bs->enable_write_cache;
3717}
3718
Paolo Bonzini425b0142012-06-06 00:04:52 +02003719void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3720{
3721 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003722
3723 /* so a reopen() will preserve wce */
3724 if (wce) {
3725 bs->open_flags |= BDRV_O_CACHE_WB;
3726 } else {
3727 bs->open_flags &= ~BDRV_O_CACHE_WB;
3728 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003729}
3730
bellardea2384d2004-08-01 21:59:26 +00003731int bdrv_is_encrypted(BlockDriverState *bs)
3732{
3733 if (bs->backing_hd && bs->backing_hd->encrypted)
3734 return 1;
3735 return bs->encrypted;
3736}
3737
aliguoric0f4ce72009-03-05 23:01:01 +00003738int bdrv_key_required(BlockDriverState *bs)
3739{
3740 BlockDriverState *backing_hd = bs->backing_hd;
3741
3742 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3743 return 1;
3744 return (bs->encrypted && !bs->valid_key);
3745}
3746
bellardea2384d2004-08-01 21:59:26 +00003747int bdrv_set_key(BlockDriverState *bs, const char *key)
3748{
3749 int ret;
3750 if (bs->backing_hd && bs->backing_hd->encrypted) {
3751 ret = bdrv_set_key(bs->backing_hd, key);
3752 if (ret < 0)
3753 return ret;
3754 if (!bs->encrypted)
3755 return 0;
3756 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003757 if (!bs->encrypted) {
3758 return -EINVAL;
3759 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3760 return -ENOMEDIUM;
3761 }
aliguoric0f4ce72009-03-05 23:01:01 +00003762 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003763 if (ret < 0) {
3764 bs->valid_key = 0;
3765 } else if (!bs->valid_key) {
3766 bs->valid_key = 1;
3767 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02003768 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00003769 }
aliguoric0f4ce72009-03-05 23:01:01 +00003770 return ret;
bellardea2384d2004-08-01 21:59:26 +00003771}
3772
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003773const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003774{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003775 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003776}
3777
Stefan Hajnocziada42402014-08-27 12:08:55 +01003778static int qsort_strcmp(const void *a, const void *b)
3779{
3780 return strcmp(a, b);
3781}
3782
ths5fafdf22007-09-16 21:08:06 +00003783void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003784 void *opaque)
3785{
3786 BlockDriver *drv;
Jeff Codye855e4f2014-04-28 18:29:54 -04003787 int count = 0;
Stefan Hajnocziada42402014-08-27 12:08:55 +01003788 int i;
Jeff Codye855e4f2014-04-28 18:29:54 -04003789 const char **formats = NULL;
bellardea2384d2004-08-01 21:59:26 +00003790
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003791 QLIST_FOREACH(drv, &bdrv_drivers, list) {
Jeff Codye855e4f2014-04-28 18:29:54 -04003792 if (drv->format_name) {
3793 bool found = false;
3794 int i = count;
3795 while (formats && i && !found) {
3796 found = !strcmp(formats[--i], drv->format_name);
3797 }
3798
3799 if (!found) {
Markus Armbruster5839e532014-08-19 10:31:08 +02003800 formats = g_renew(const char *, formats, count + 1);
Jeff Codye855e4f2014-04-28 18:29:54 -04003801 formats[count++] = drv->format_name;
Jeff Codye855e4f2014-04-28 18:29:54 -04003802 }
3803 }
bellardea2384d2004-08-01 21:59:26 +00003804 }
Stefan Hajnocziada42402014-08-27 12:08:55 +01003805
3806 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3807
3808 for (i = 0; i < count; i++) {
3809 it(opaque, formats[i]);
3810 }
3811
Jeff Codye855e4f2014-04-28 18:29:54 -04003812 g_free(formats);
bellardea2384d2004-08-01 21:59:26 +00003813}
3814
Benoît Canetdc364f42014-01-23 21:31:32 +01003815/* This function is to find block backend bs */
bellardb3380822004-03-14 21:38:54 +00003816BlockDriverState *bdrv_find(const char *name)
3817{
3818 BlockDriverState *bs;
3819
Benoît Canetdc364f42014-01-23 21:31:32 +01003820 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003821 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00003822 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003823 }
bellardb3380822004-03-14 21:38:54 +00003824 }
3825 return NULL;
3826}
3827
Benoît Canetdc364f42014-01-23 21:31:32 +01003828/* This function is to find a node in the bs graph */
3829BlockDriverState *bdrv_find_node(const char *node_name)
3830{
3831 BlockDriverState *bs;
3832
3833 assert(node_name);
3834
3835 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3836 if (!strcmp(node_name, bs->node_name)) {
3837 return bs;
3838 }
3839 }
3840 return NULL;
3841}
3842
Benoît Canetc13163f2014-01-23 21:31:34 +01003843/* Put this QMP function here so it can access the static graph_bdrv_states. */
3844BlockDeviceInfoList *bdrv_named_nodes_list(void)
3845{
3846 BlockDeviceInfoList *list, *entry;
3847 BlockDriverState *bs;
3848
3849 list = NULL;
3850 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3851 entry = g_malloc0(sizeof(*entry));
3852 entry->value = bdrv_block_device_info(bs);
3853 entry->next = list;
3854 list = entry;
3855 }
3856
3857 return list;
3858}
3859
Benoît Canet12d3ba82014-01-23 21:31:35 +01003860BlockDriverState *bdrv_lookup_bs(const char *device,
3861 const char *node_name,
3862 Error **errp)
3863{
3864 BlockDriverState *bs = NULL;
3865
Benoît Canet12d3ba82014-01-23 21:31:35 +01003866 if (device) {
3867 bs = bdrv_find(device);
3868
Benoît Canetdd67fa52014-02-12 17:15:06 +01003869 if (bs) {
3870 return bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003871 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003872 }
3873
Benoît Canetdd67fa52014-02-12 17:15:06 +01003874 if (node_name) {
3875 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003876
Benoît Canetdd67fa52014-02-12 17:15:06 +01003877 if (bs) {
3878 return bs;
3879 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003880 }
3881
Benoît Canetdd67fa52014-02-12 17:15:06 +01003882 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3883 device ? device : "",
3884 node_name ? node_name : "");
3885 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003886}
3887
Jeff Cody5a6684d2014-06-25 15:40:09 -04003888/* If 'base' is in the same chain as 'top', return true. Otherwise,
3889 * return false. If either argument is NULL, return false. */
3890bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3891{
3892 while (top && top != base) {
3893 top = top->backing_hd;
3894 }
3895
3896 return top != NULL;
3897}
3898
Markus Armbruster2f399b02010-06-02 18:55:20 +02003899BlockDriverState *bdrv_next(BlockDriverState *bs)
3900{
3901 if (!bs) {
3902 return QTAILQ_FIRST(&bdrv_states);
3903 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003904 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003905}
3906
aliguori51de9762009-03-05 23:00:43 +00003907void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00003908{
3909 BlockDriverState *bs;
3910
Benoît Canetdc364f42014-01-23 21:31:32 +01003911 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
aliguori51de9762009-03-05 23:00:43 +00003912 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00003913 }
3914}
3915
bellardea2384d2004-08-01 21:59:26 +00003916const char *bdrv_get_device_name(BlockDriverState *bs)
3917{
3918 return bs->device_name;
3919}
3920
Markus Armbrusterc8433282012-06-05 16:49:24 +02003921int bdrv_get_flags(BlockDriverState *bs)
3922{
3923 return bs->open_flags;
3924}
3925
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003926int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00003927{
3928 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003929 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00003930
Benoît Canetdc364f42014-01-23 21:31:32 +01003931 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003932 AioContext *aio_context = bdrv_get_aio_context(bs);
3933 int ret;
3934
3935 aio_context_acquire(aio_context);
3936 ret = bdrv_flush(bs);
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003937 if (ret < 0 && !result) {
3938 result = ret;
3939 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003940 aio_context_release(aio_context);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003941 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003942
3943 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00003944}
3945
Peter Lieven3ac21622013-06-28 12:47:42 +02003946int bdrv_has_zero_init_1(BlockDriverState *bs)
3947{
3948 return 1;
3949}
3950
Kevin Wolff2feebb2010-04-14 17:30:35 +02003951int bdrv_has_zero_init(BlockDriverState *bs)
3952{
3953 assert(bs->drv);
3954
Paolo Bonzini11212d82013-09-04 19:00:27 +02003955 /* If BS is a copy on write image, it is initialized to
3956 the contents of the base image, which may not be zeroes. */
3957 if (bs->backing_hd) {
3958 return 0;
3959 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02003960 if (bs->drv->bdrv_has_zero_init) {
3961 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02003962 }
3963
Peter Lieven3ac21622013-06-28 12:47:42 +02003964 /* safe default */
3965 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02003966}
3967
Peter Lieven4ce78692013-10-24 12:06:54 +02003968bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3969{
3970 BlockDriverInfo bdi;
3971
3972 if (bs->backing_hd) {
3973 return false;
3974 }
3975
3976 if (bdrv_get_info(bs, &bdi) == 0) {
3977 return bdi.unallocated_blocks_are_zero;
3978 }
3979
3980 return false;
3981}
3982
3983bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3984{
3985 BlockDriverInfo bdi;
3986
3987 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3988 return false;
3989 }
3990
3991 if (bdrv_get_info(bs, &bdi) == 0) {
3992 return bdi.can_write_zeroes_with_unmap;
3993 }
3994
3995 return false;
3996}
3997
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003998typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003999 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01004000 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004001 int64_t sector_num;
4002 int nb_sectors;
4003 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004004 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004005 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004006} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004007
thsf58c7b32008-06-05 21:53:49 +00004008/*
4009 * Returns true iff the specified sector is present in the disk image. Drivers
4010 * not implementing the functionality are assumed to not support backing files,
4011 * hence all their sectors are reported as allocated.
4012 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004013 * If 'sector_num' is beyond the end of the disk image the return value is 0
4014 * and 'pnum' is set to 0.
4015 *
thsf58c7b32008-06-05 21:53:49 +00004016 * 'pnum' is set to the number of sectors (including and immediately following
4017 * the specified sector) that are known to be in the same
4018 * allocated/unallocated state.
4019 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004020 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
4021 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00004022 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004023static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4024 int64_t sector_num,
4025 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00004026{
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004027 int64_t total_sectors;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004028 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004029 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004030
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004031 total_sectors = bdrv_nb_sectors(bs);
4032 if (total_sectors < 0) {
4033 return total_sectors;
Paolo Bonzini617ccb42013-09-04 19:00:23 +02004034 }
4035
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004036 if (sector_num >= total_sectors) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004037 *pnum = 0;
4038 return 0;
4039 }
4040
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004041 n = total_sectors - sector_num;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004042 if (n < nb_sectors) {
4043 nb_sectors = n;
4044 }
4045
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004046 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004047 *pnum = nb_sectors;
Kevin Wolfe88ae222014-05-06 15:25:36 +02004048 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02004049 if (bs->drv->protocol_name) {
4050 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4051 }
4052 return ret;
thsf58c7b32008-06-05 21:53:49 +00004053 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004054
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004055 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4056 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02004057 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004058 return ret;
4059 }
4060
Peter Lieven92bc50a2013-10-08 14:43:14 +02004061 if (ret & BDRV_BLOCK_RAW) {
4062 assert(ret & BDRV_BLOCK_OFFSET_VALID);
4063 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4064 *pnum, pnum);
4065 }
4066
Kevin Wolfe88ae222014-05-06 15:25:36 +02004067 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4068 ret |= BDRV_BLOCK_ALLOCATED;
4069 }
4070
Peter Lievenc3d86882013-10-24 12:07:04 +02004071 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4072 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004073 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02004074 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004075 BlockDriverState *bs2 = bs->backing_hd;
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004076 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
4077 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004078 ret |= BDRV_BLOCK_ZERO;
4079 }
4080 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004081 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004082
4083 if (bs->file &&
4084 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
4085 (ret & BDRV_BLOCK_OFFSET_VALID)) {
4086 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4087 *pnum, pnum);
4088 if (ret2 >= 0) {
4089 /* Ignore errors. This is just providing extra information, it
4090 * is useful but not necessary.
4091 */
4092 ret |= (ret2 & BDRV_BLOCK_ZERO);
4093 }
4094 }
4095
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004096 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004097}
4098
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004099/* Coroutine wrapper for bdrv_get_block_status() */
4100static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004101{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004102 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004103 BlockDriverState *bs = data->bs;
4104
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004105 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4106 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004107 data->done = true;
4108}
4109
4110/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004111 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004112 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004113 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004114 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004115int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4116 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004117{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004118 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004119 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004120 .bs = bs,
4121 .sector_num = sector_num,
4122 .nb_sectors = nb_sectors,
4123 .pnum = pnum,
4124 .done = false,
4125 };
4126
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004127 if (qemu_in_coroutine()) {
4128 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004129 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004130 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004131 AioContext *aio_context = bdrv_get_aio_context(bs);
4132
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004133 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004134 qemu_coroutine_enter(co, &data);
4135 while (!data.done) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004136 aio_poll(aio_context, true);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004137 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004138 }
4139 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00004140}
4141
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004142int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4143 int nb_sectors, int *pnum)
4144{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02004145 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4146 if (ret < 0) {
4147 return ret;
4148 }
Kevin Wolf01fb2702014-07-07 17:00:37 +02004149 return !!(ret & BDRV_BLOCK_ALLOCATED);
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004150}
4151
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004152/*
4153 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4154 *
4155 * Return true if the given sector is allocated in any image between
4156 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4157 * sector is allocated in any image of the chain. Return false otherwise.
4158 *
4159 * 'pnum' is set to the number of sectors (including and immediately following
4160 * the specified sector) that are known to be in the same
4161 * allocated/unallocated state.
4162 *
4163 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02004164int bdrv_is_allocated_above(BlockDriverState *top,
4165 BlockDriverState *base,
4166 int64_t sector_num,
4167 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004168{
4169 BlockDriverState *intermediate;
4170 int ret, n = nb_sectors;
4171
4172 intermediate = top;
4173 while (intermediate && intermediate != base) {
4174 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004175 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4176 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004177 if (ret < 0) {
4178 return ret;
4179 } else if (ret) {
4180 *pnum = pnum_inter;
4181 return 1;
4182 }
4183
4184 /*
4185 * [sector_num, nb_sectors] is unallocated on top but intermediate
4186 * might have
4187 *
4188 * [sector_num+x, nr_sectors] allocated.
4189 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08004190 if (n > pnum_inter &&
4191 (intermediate == top ||
4192 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004193 n = pnum_inter;
4194 }
4195
4196 intermediate = intermediate->backing_hd;
4197 }
4198
4199 *pnum = n;
4200 return 0;
4201}
4202
aliguori045df332009-03-05 23:00:48 +00004203const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4204{
4205 if (bs->backing_hd && bs->backing_hd->encrypted)
4206 return bs->backing_file;
4207 else if (bs->encrypted)
4208 return bs->filename;
4209 else
4210 return NULL;
4211}
4212
ths5fafdf22007-09-16 21:08:06 +00004213void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00004214 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00004215{
Kevin Wolf3574c602011-10-26 11:02:11 +02004216 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00004217}
4218
ths5fafdf22007-09-16 21:08:06 +00004219int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00004220 const uint8_t *buf, int nb_sectors)
4221{
4222 BlockDriver *drv = bs->drv;
4223 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004224 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004225 if (!drv->bdrv_write_compressed)
4226 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02004227 if (bdrv_check_request(bs, sector_num, nb_sectors))
4228 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004229
Fam Zhenge4654d22013-11-13 18:29:43 +08004230 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004231
bellardfaea38e2006-08-05 21:31:00 +00004232 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4233}
ths3b46e622007-09-17 08:09:54 +00004234
bellardfaea38e2006-08-05 21:31:00 +00004235int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4236{
4237 BlockDriver *drv = bs->drv;
4238 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004239 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004240 if (!drv->bdrv_get_info)
4241 return -ENOTSUP;
4242 memset(bdi, 0, sizeof(*bdi));
4243 return drv->bdrv_get_info(bs, bdi);
4244}
4245
Max Reitzeae041f2013-10-09 10:46:16 +02004246ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4247{
4248 BlockDriver *drv = bs->drv;
4249 if (drv && drv->bdrv_get_specific_info) {
4250 return drv->bdrv_get_specific_info(bs);
4251 }
4252 return NULL;
4253}
4254
Christoph Hellwig45566e92009-07-10 23:11:57 +02004255int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4256 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004257{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004258 QEMUIOVector qiov;
4259 struct iovec iov = {
4260 .iov_base = (void *) buf,
4261 .iov_len = size,
4262 };
4263
4264 qemu_iovec_init_external(&qiov, &iov, 1);
4265 return bdrv_writev_vmstate(bs, &qiov, pos);
4266}
4267
4268int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4269{
aliguori178e08a2009-04-05 19:10:55 +00004270 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004271
4272 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00004273 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004274 } else if (drv->bdrv_save_vmstate) {
4275 return drv->bdrv_save_vmstate(bs, qiov, pos);
4276 } else if (bs->file) {
4277 return bdrv_writev_vmstate(bs->file, qiov, pos);
4278 }
4279
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004280 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004281}
4282
Christoph Hellwig45566e92009-07-10 23:11:57 +02004283int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4284 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004285{
4286 BlockDriver *drv = bs->drv;
4287 if (!drv)
4288 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004289 if (drv->bdrv_load_vmstate)
4290 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4291 if (bs->file)
4292 return bdrv_load_vmstate(bs->file, buf, pos, size);
4293 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004294}
4295
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004296void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4297{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004298 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004299 return;
4300 }
4301
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004302 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004303}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004304
Kevin Wolf41c695c2012-12-06 14:32:58 +01004305int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4306 const char *tag)
4307{
4308 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4309 bs = bs->file;
4310 }
4311
4312 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4313 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4314 }
4315
4316 return -ENOTSUP;
4317}
4318
Fam Zheng4cc70e92013-11-20 10:01:54 +08004319int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4320{
4321 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4322 bs = bs->file;
4323 }
4324
4325 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4326 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4327 }
4328
4329 return -ENOTSUP;
4330}
4331
Kevin Wolf41c695c2012-12-06 14:32:58 +01004332int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4333{
Max Reitz938789e2014-03-10 23:44:08 +01004334 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
Kevin Wolf41c695c2012-12-06 14:32:58 +01004335 bs = bs->file;
4336 }
4337
4338 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4339 return bs->drv->bdrv_debug_resume(bs, tag);
4340 }
4341
4342 return -ENOTSUP;
4343}
4344
4345bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4346{
4347 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4348 bs = bs->file;
4349 }
4350
4351 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4352 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4353 }
4354
4355 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004356}
4357
Blue Swirl199630b2010-07-25 20:49:34 +00004358int bdrv_is_snapshot(BlockDriverState *bs)
4359{
4360 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4361}
4362
Jeff Codyb1b1d782012-10-16 15:49:09 -04004363/* backing_file can either be relative, or absolute, or a protocol. If it is
4364 * relative, it must be relative to the chain. So, passing in bs->filename
4365 * from a BDS as backing_file should not be done, as that may be relative to
4366 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004367BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4368 const char *backing_file)
4369{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004370 char *filename_full = NULL;
4371 char *backing_file_full = NULL;
4372 char *filename_tmp = NULL;
4373 int is_protocol = 0;
4374 BlockDriverState *curr_bs = NULL;
4375 BlockDriverState *retval = NULL;
4376
4377 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004378 return NULL;
4379 }
4380
Jeff Codyb1b1d782012-10-16 15:49:09 -04004381 filename_full = g_malloc(PATH_MAX);
4382 backing_file_full = g_malloc(PATH_MAX);
4383 filename_tmp = g_malloc(PATH_MAX);
4384
4385 is_protocol = path_has_protocol(backing_file);
4386
4387 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4388
4389 /* If either of the filename paths is actually a protocol, then
4390 * compare unmodified paths; otherwise make paths relative */
4391 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4392 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4393 retval = curr_bs->backing_hd;
4394 break;
4395 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004396 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004397 /* If not an absolute filename path, make it relative to the current
4398 * image's filename path */
4399 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4400 backing_file);
4401
4402 /* We are going to compare absolute pathnames */
4403 if (!realpath(filename_tmp, filename_full)) {
4404 continue;
4405 }
4406
4407 /* We need to make sure the backing filename we are comparing against
4408 * is relative to the current image filename (or absolute) */
4409 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4410 curr_bs->backing_file);
4411
4412 if (!realpath(filename_tmp, backing_file_full)) {
4413 continue;
4414 }
4415
4416 if (strcmp(backing_file_full, filename_full) == 0) {
4417 retval = curr_bs->backing_hd;
4418 break;
4419 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004420 }
4421 }
4422
Jeff Codyb1b1d782012-10-16 15:49:09 -04004423 g_free(filename_full);
4424 g_free(backing_file_full);
4425 g_free(filename_tmp);
4426 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004427}
4428
Benoît Canetf198fd12012-08-02 10:22:47 +02004429int bdrv_get_backing_file_depth(BlockDriverState *bs)
4430{
4431 if (!bs->drv) {
4432 return 0;
4433 }
4434
4435 if (!bs->backing_hd) {
4436 return 0;
4437 }
4438
4439 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4440}
4441
bellard83f64092006-08-01 16:21:11 +00004442/**************************************************************/
4443/* async I/Os */
4444
aliguori3b69e4b2009-01-22 16:59:24 +00004445BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00004446 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00004447 BlockDriverCompletionFunc *cb, void *opaque)
4448{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004449 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4450
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004451 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004452 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004453}
4454
aliguorif141eaf2009-04-07 18:43:24 +00004455BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4456 QEMUIOVector *qiov, int nb_sectors,
4457 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004458{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004459 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4460
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004461 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004462 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004463}
4464
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004465BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4466 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4467 BlockDriverCompletionFunc *cb, void *opaque)
4468{
4469 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4470
4471 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4472 BDRV_REQ_ZERO_WRITE | flags,
4473 cb, opaque, true);
4474}
4475
Kevin Wolf40b4f532009-09-09 17:53:37 +02004476
4477typedef struct MultiwriteCB {
4478 int error;
4479 int num_requests;
4480 int num_callbacks;
4481 struct {
4482 BlockDriverCompletionFunc *cb;
4483 void *opaque;
4484 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004485 } callbacks[];
4486} MultiwriteCB;
4487
4488static void multiwrite_user_cb(MultiwriteCB *mcb)
4489{
4490 int i;
4491
4492 for (i = 0; i < mcb->num_callbacks; i++) {
4493 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004494 if (mcb->callbacks[i].free_qiov) {
4495 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4496 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004497 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004498 }
4499}
4500
4501static void multiwrite_cb(void *opaque, int ret)
4502{
4503 MultiwriteCB *mcb = opaque;
4504
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004505 trace_multiwrite_cb(mcb, ret);
4506
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004507 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004508 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004509 }
4510
4511 mcb->num_requests--;
4512 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004513 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004514 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004515 }
4516}
4517
4518static int multiwrite_req_compare(const void *a, const void *b)
4519{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004520 const BlockRequest *req1 = a, *req2 = b;
4521
4522 /*
4523 * Note that we can't simply subtract req2->sector from req1->sector
4524 * here as that could overflow the return value.
4525 */
4526 if (req1->sector > req2->sector) {
4527 return 1;
4528 } else if (req1->sector < req2->sector) {
4529 return -1;
4530 } else {
4531 return 0;
4532 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004533}
4534
4535/*
4536 * Takes a bunch of requests and tries to merge them. Returns the number of
4537 * requests that remain after merging.
4538 */
4539static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4540 int num_reqs, MultiwriteCB *mcb)
4541{
4542 int i, outidx;
4543
4544 // Sort requests by start sector
4545 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4546
4547 // Check if adjacent requests touch the same clusters. If so, combine them,
4548 // filling up gaps with zero sectors.
4549 outidx = 0;
4550 for (i = 1; i < num_reqs; i++) {
4551 int merge = 0;
4552 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4553
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004554 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004555 if (reqs[i].sector <= oldreq_last) {
4556 merge = 1;
4557 }
4558
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004559 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4560 merge = 0;
4561 }
4562
Kevin Wolf40b4f532009-09-09 17:53:37 +02004563 if (merge) {
4564 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004565 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004566 qemu_iovec_init(qiov,
4567 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4568
4569 // Add the first request to the merged one. If the requests are
4570 // overlapping, drop the last sectors of the first request.
4571 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004572 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004573
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004574 // We should need to add any zeros between the two requests
4575 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004576
4577 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004578 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004579
Stefan Hajnoczi391827e2014-07-30 09:53:30 +01004580 // Add tail of first request, if necessary
4581 if (qiov->size < reqs[outidx].qiov->size) {
4582 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4583 reqs[outidx].qiov->size - qiov->size);
4584 }
4585
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004586 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004587 reqs[outidx].qiov = qiov;
4588
4589 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4590 } else {
4591 outidx++;
4592 reqs[outidx].sector = reqs[i].sector;
4593 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4594 reqs[outidx].qiov = reqs[i].qiov;
4595 }
4596 }
4597
4598 return outidx + 1;
4599}
4600
4601/*
4602 * Submit multiple AIO write requests at once.
4603 *
4604 * On success, the function returns 0 and all requests in the reqs array have
4605 * been submitted. In error case this function returns -1, and any of the
4606 * requests may or may not be submitted yet. In particular, this means that the
4607 * callback will be called for some of the requests, for others it won't. The
4608 * caller must check the error field of the BlockRequest to wait for the right
4609 * callbacks (if error != 0, no callback will be called).
4610 *
4611 * The implementation may modify the contents of the reqs array, e.g. to merge
4612 * requests. However, the fields opaque and error are left unmodified as they
4613 * are used to signal failure for a single request to the caller.
4614 */
4615int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4616{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004617 MultiwriteCB *mcb;
4618 int i;
4619
Ryan Harper301db7c2011-03-07 10:01:04 -06004620 /* don't submit writes if we don't have a medium */
4621 if (bs->drv == NULL) {
4622 for (i = 0; i < num_reqs; i++) {
4623 reqs[i].error = -ENOMEDIUM;
4624 }
4625 return -1;
4626 }
4627
Kevin Wolf40b4f532009-09-09 17:53:37 +02004628 if (num_reqs == 0) {
4629 return 0;
4630 }
4631
4632 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004633 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004634 mcb->num_requests = 0;
4635 mcb->num_callbacks = num_reqs;
4636
4637 for (i = 0; i < num_reqs; i++) {
4638 mcb->callbacks[i].cb = reqs[i].cb;
4639 mcb->callbacks[i].opaque = reqs[i].opaque;
4640 }
4641
4642 // Check for mergable requests
4643 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4644
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004645 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4646
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004647 /* Run the aio requests. */
4648 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004649 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004650 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4651 reqs[i].nb_sectors, reqs[i].flags,
4652 multiwrite_cb, mcb,
4653 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004654 }
4655
4656 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004657}
4658
bellard83f64092006-08-01 16:21:11 +00004659void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004660{
Fam Zhengca5fd112014-09-11 13:41:27 +08004661 qemu_aio_ref(acb);
4662 bdrv_aio_cancel_async(acb);
4663 while (acb->refcnt > 1) {
4664 if (acb->aiocb_info->get_aio_context) {
4665 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
4666 } else if (acb->bs) {
4667 aio_poll(bdrv_get_aio_context(acb->bs), true);
4668 } else {
4669 abort();
Fam Zheng02c50ef2014-09-11 13:41:09 +08004670 }
Fam Zheng02c50ef2014-09-11 13:41:09 +08004671 }
Fam Zheng80074292014-09-11 13:41:28 +08004672 qemu_aio_unref(acb);
Fam Zheng02c50ef2014-09-11 13:41:09 +08004673}
4674
4675/* Async version of aio cancel. The caller is not blocked if the acb implements
4676 * cancel_async, otherwise we do nothing and let the request normally complete.
4677 * In either case the completion callback must be called. */
4678void bdrv_aio_cancel_async(BlockDriverAIOCB *acb)
4679{
4680 if (acb->aiocb_info->cancel_async) {
4681 acb->aiocb_info->cancel_async(acb);
4682 }
bellard83f64092006-08-01 16:21:11 +00004683}
4684
4685/**************************************************************/
4686/* async block device emulation */
4687
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004688typedef struct BlockDriverAIOCBSync {
4689 BlockDriverAIOCB common;
4690 QEMUBH *bh;
4691 int ret;
4692 /* vector translation state */
4693 QEMUIOVector *qiov;
4694 uint8_t *bounce;
4695 int is_write;
4696} BlockDriverAIOCBSync;
4697
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004698static const AIOCBInfo bdrv_em_aiocb_info = {
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004699 .aiocb_size = sizeof(BlockDriverAIOCBSync),
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004700};
4701
bellard83f64092006-08-01 16:21:11 +00004702static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004703{
pbrookce1a14d2006-08-07 02:38:06 +00004704 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004705
Kevin Wolf857d4f42014-05-20 13:16:51 +02004706 if (!acb->is_write && acb->ret >= 0) {
Michael Tokarev03396142012-06-07 20:17:55 +04004707 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
Kevin Wolf857d4f42014-05-20 13:16:51 +02004708 }
aliguoriceb42de2009-04-07 18:43:28 +00004709 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004710 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004711 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004712 acb->bh = NULL;
Fam Zheng80074292014-09-11 13:41:28 +08004713 qemu_aio_unref(acb);
bellardbeac80c2006-06-26 20:08:57 +00004714}
bellardbeac80c2006-06-26 20:08:57 +00004715
aliguorif141eaf2009-04-07 18:43:24 +00004716static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4717 int64_t sector_num,
4718 QEMUIOVector *qiov,
4719 int nb_sectors,
4720 BlockDriverCompletionFunc *cb,
4721 void *opaque,
4722 int is_write)
4723
bellardea2384d2004-08-01 21:59:26 +00004724{
pbrookce1a14d2006-08-07 02:38:06 +00004725 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004726
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004727 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004728 acb->is_write = is_write;
4729 acb->qiov = qiov;
Kevin Wolf857d4f42014-05-20 13:16:51 +02004730 acb->bounce = qemu_try_blockalign(bs, qiov->size);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004731 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004732
Kevin Wolf857d4f42014-05-20 13:16:51 +02004733 if (acb->bounce == NULL) {
4734 acb->ret = -ENOMEM;
4735 } else if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004736 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004737 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004738 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004739 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004740 }
4741
pbrookce1a14d2006-08-07 02:38:06 +00004742 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004743
pbrookce1a14d2006-08-07 02:38:06 +00004744 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004745}
4746
aliguorif141eaf2009-04-07 18:43:24 +00004747static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4748 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00004749 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004750{
aliguorif141eaf2009-04-07 18:43:24 +00004751 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004752}
4753
aliguorif141eaf2009-04-07 18:43:24 +00004754static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4755 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4756 BlockDriverCompletionFunc *cb, void *opaque)
4757{
4758 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4759}
4760
Kevin Wolf68485422011-06-30 10:05:46 +02004761
4762typedef struct BlockDriverAIOCBCoroutine {
4763 BlockDriverAIOCB common;
4764 BlockRequest req;
4765 bool is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004766 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004767 QEMUBH* bh;
4768} BlockDriverAIOCBCoroutine;
4769
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004770static const AIOCBInfo bdrv_em_co_aiocb_info = {
Kevin Wolf68485422011-06-30 10:05:46 +02004771 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
Kevin Wolf68485422011-06-30 10:05:46 +02004772};
4773
Paolo Bonzini35246a62011-10-14 10:41:29 +02004774static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004775{
4776 BlockDriverAIOCBCoroutine *acb = opaque;
4777
4778 acb->common.cb(acb->common.opaque, acb->req.error);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004779
Kevin Wolf68485422011-06-30 10:05:46 +02004780 qemu_bh_delete(acb->bh);
Fam Zheng80074292014-09-11 13:41:28 +08004781 qemu_aio_unref(acb);
Kevin Wolf68485422011-06-30 10:05:46 +02004782}
4783
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004784/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4785static void coroutine_fn bdrv_co_do_rw(void *opaque)
4786{
4787 BlockDriverAIOCBCoroutine *acb = opaque;
4788 BlockDriverState *bs = acb->common.bs;
4789
4790 if (!acb->is_write) {
4791 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004792 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004793 } else {
4794 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004795 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004796 }
4797
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004798 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004799 qemu_bh_schedule(acb->bh);
4800}
4801
Kevin Wolf68485422011-06-30 10:05:46 +02004802static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4803 int64_t sector_num,
4804 QEMUIOVector *qiov,
4805 int nb_sectors,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004806 BdrvRequestFlags flags,
Kevin Wolf68485422011-06-30 10:05:46 +02004807 BlockDriverCompletionFunc *cb,
4808 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004809 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004810{
4811 Coroutine *co;
4812 BlockDriverAIOCBCoroutine *acb;
4813
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004814 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolf68485422011-06-30 10:05:46 +02004815 acb->req.sector = sector_num;
4816 acb->req.nb_sectors = nb_sectors;
4817 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004818 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004819 acb->is_write = is_write;
4820
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004821 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004822 qemu_coroutine_enter(co, acb);
4823
4824 return &acb->common;
4825}
4826
Paolo Bonzini07f07612011-10-17 12:32:12 +02004827static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004828{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004829 BlockDriverAIOCBCoroutine *acb = opaque;
4830 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004831
Paolo Bonzini07f07612011-10-17 12:32:12 +02004832 acb->req.error = bdrv_co_flush(bs);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004833 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004834 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004835}
4836
Paolo Bonzini07f07612011-10-17 12:32:12 +02004837BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02004838 BlockDriverCompletionFunc *cb, void *opaque)
4839{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004840 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004841
Paolo Bonzini07f07612011-10-17 12:32:12 +02004842 Coroutine *co;
4843 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004844
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004845 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004846
Paolo Bonzini07f07612011-10-17 12:32:12 +02004847 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4848 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004849
Alexander Graf016f5cf2010-05-26 17:51:49 +02004850 return &acb->common;
4851}
4852
Paolo Bonzini4265d622011-10-17 12:32:14 +02004853static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4854{
4855 BlockDriverAIOCBCoroutine *acb = opaque;
4856 BlockDriverState *bs = acb->common.bs;
4857
4858 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004859 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004860 qemu_bh_schedule(acb->bh);
4861}
4862
4863BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
4864 int64_t sector_num, int nb_sectors,
4865 BlockDriverCompletionFunc *cb, void *opaque)
4866{
4867 Coroutine *co;
4868 BlockDriverAIOCBCoroutine *acb;
4869
4870 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4871
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004872 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004873 acb->req.sector = sector_num;
4874 acb->req.nb_sectors = nb_sectors;
4875 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4876 qemu_coroutine_enter(co, acb);
4877
4878 return &acb->common;
4879}
4880
bellardea2384d2004-08-01 21:59:26 +00004881void bdrv_init(void)
4882{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05004883 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00004884}
pbrookce1a14d2006-08-07 02:38:06 +00004885
Markus Armbrustereb852012009-10-27 18:41:44 +01004886void bdrv_init_with_whitelist(void)
4887{
4888 use_bdrv_whitelist = 1;
4889 bdrv_init();
4890}
4891
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004892void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004893 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00004894{
pbrookce1a14d2006-08-07 02:38:06 +00004895 BlockDriverAIOCB *acb;
4896
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004897 acb = g_slice_alloc(aiocb_info->aiocb_size);
4898 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00004899 acb->bs = bs;
4900 acb->cb = cb;
4901 acb->opaque = opaque;
Fam Zhengf197fe22014-09-11 13:41:08 +08004902 acb->refcnt = 1;
pbrookce1a14d2006-08-07 02:38:06 +00004903 return acb;
4904}
4905
Fam Zhengf197fe22014-09-11 13:41:08 +08004906void qemu_aio_ref(void *p)
4907{
4908 BlockDriverAIOCB *acb = p;
4909 acb->refcnt++;
4910}
4911
Fam Zheng80074292014-09-11 13:41:28 +08004912void qemu_aio_unref(void *p)
pbrookce1a14d2006-08-07 02:38:06 +00004913{
Stefan Hajnoczid37c9752012-10-31 16:34:36 +01004914 BlockDriverAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08004915 assert(acb->refcnt > 0);
4916 if (--acb->refcnt == 0) {
4917 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
4918 }
pbrookce1a14d2006-08-07 02:38:06 +00004919}
bellard19cb3732006-08-19 11:45:59 +00004920
4921/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004922/* Coroutine block device emulation */
4923
4924typedef struct CoroutineIOCompletion {
4925 Coroutine *coroutine;
4926 int ret;
4927} CoroutineIOCompletion;
4928
4929static void bdrv_co_io_em_complete(void *opaque, int ret)
4930{
4931 CoroutineIOCompletion *co = opaque;
4932
4933 co->ret = ret;
4934 qemu_coroutine_enter(co->coroutine, NULL);
4935}
4936
4937static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4938 int nb_sectors, QEMUIOVector *iov,
4939 bool is_write)
4940{
4941 CoroutineIOCompletion co = {
4942 .coroutine = qemu_coroutine_self(),
4943 };
4944 BlockDriverAIOCB *acb;
4945
4946 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004947 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4948 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004949 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004950 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4951 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004952 }
4953
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01004954 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004955 if (!acb) {
4956 return -EIO;
4957 }
4958 qemu_coroutine_yield();
4959
4960 return co.ret;
4961}
4962
4963static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4964 int64_t sector_num, int nb_sectors,
4965 QEMUIOVector *iov)
4966{
4967 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4968}
4969
4970static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4971 int64_t sector_num, int nb_sectors,
4972 QEMUIOVector *iov)
4973{
4974 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4975}
4976
Paolo Bonzini07f07612011-10-17 12:32:12 +02004977static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004978{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004979 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004980
Paolo Bonzini07f07612011-10-17 12:32:12 +02004981 rwco->ret = bdrv_co_flush(rwco->bs);
4982}
4983
4984int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4985{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004986 int ret;
4987
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004988 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02004989 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004990 }
4991
Kevin Wolfca716362011-11-10 18:13:59 +01004992 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004993 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004994 if (bs->drv->bdrv_co_flush_to_os) {
4995 ret = bs->drv->bdrv_co_flush_to_os(bs);
4996 if (ret < 0) {
4997 return ret;
4998 }
4999 }
5000
Kevin Wolfca716362011-11-10 18:13:59 +01005001 /* But don't actually force it to the disk with cache=unsafe */
5002 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02005003 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01005004 }
5005
Kevin Wolfbf736fe2013-06-05 15:17:55 +02005006 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005007 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005008 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005009 } else if (bs->drv->bdrv_aio_flush) {
5010 BlockDriverAIOCB *acb;
5011 CoroutineIOCompletion co = {
5012 .coroutine = qemu_coroutine_self(),
5013 };
5014
5015 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
5016 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005017 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005018 } else {
5019 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005020 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005021 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02005022 } else {
5023 /*
5024 * Some block drivers always operate in either writethrough or unsafe
5025 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
5026 * know how the server works (because the behaviour is hardcoded or
5027 * depends on server-side configuration), so we can't ensure that
5028 * everything is safe on disk. Returning an error doesn't work because
5029 * that would break guests even if the server operates in writethrough
5030 * mode.
5031 *
5032 * Let's hope the user knows what he's doing.
5033 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005034 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005035 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005036 if (ret < 0) {
5037 return ret;
5038 }
5039
5040 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
5041 * in the case of cache=unsafe, so there are no useless flushes.
5042 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02005043flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005044 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005045}
5046
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005047void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005048{
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005049 Error *local_err = NULL;
5050 int ret;
5051
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005052 if (!bs->drv) {
5053 return;
Anthony Liguori0f154232011-11-14 15:09:45 -06005054 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005055
Alexey Kardashevskiy7ea2d262014-10-09 13:50:46 +11005056 if (!(bs->open_flags & BDRV_O_INCOMING)) {
5057 return;
5058 }
5059 bs->open_flags &= ~BDRV_O_INCOMING;
5060
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005061 if (bs->drv->bdrv_invalidate_cache) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005062 bs->drv->bdrv_invalidate_cache(bs, &local_err);
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005063 } else if (bs->file) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005064 bdrv_invalidate_cache(bs->file, &local_err);
5065 }
5066 if (local_err) {
5067 error_propagate(errp, local_err);
5068 return;
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005069 }
5070
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005071 ret = refresh_total_sectors(bs, bs->total_sectors);
5072 if (ret < 0) {
5073 error_setg_errno(errp, -ret, "Could not refresh total sector count");
5074 return;
5075 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005076}
5077
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005078void bdrv_invalidate_cache_all(Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005079{
5080 BlockDriverState *bs;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005081 Error *local_err = NULL;
Anthony Liguori0f154232011-11-14 15:09:45 -06005082
Benoît Canetdc364f42014-01-23 21:31:32 +01005083 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005084 AioContext *aio_context = bdrv_get_aio_context(bs);
5085
5086 aio_context_acquire(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005087 bdrv_invalidate_cache(bs, &local_err);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005088 aio_context_release(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005089 if (local_err) {
5090 error_propagate(errp, local_err);
5091 return;
5092 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005093 }
5094}
5095
Paolo Bonzini07f07612011-10-17 12:32:12 +02005096int bdrv_flush(BlockDriverState *bs)
5097{
5098 Coroutine *co;
5099 RwCo rwco = {
5100 .bs = bs,
5101 .ret = NOT_DONE,
5102 };
5103
5104 if (qemu_in_coroutine()) {
5105 /* Fast-path if already in coroutine context */
5106 bdrv_flush_co_entry(&rwco);
5107 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005108 AioContext *aio_context = bdrv_get_aio_context(bs);
5109
Paolo Bonzini07f07612011-10-17 12:32:12 +02005110 co = qemu_coroutine_create(bdrv_flush_co_entry);
5111 qemu_coroutine_enter(co, &rwco);
5112 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005113 aio_poll(aio_context, true);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005114 }
5115 }
5116
5117 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005118}
5119
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005120typedef struct DiscardCo {
5121 BlockDriverState *bs;
5122 int64_t sector_num;
5123 int nb_sectors;
5124 int ret;
5125} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005126static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5127{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005128 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005129
5130 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5131}
5132
Peter Lieven6f14da52013-10-24 12:06:59 +02005133/* if no limit is specified in the BlockLimits use a default
5134 * of 32768 512-byte sectors (16 MiB) per request.
5135 */
5136#define MAX_DISCARD_DEFAULT 32768
5137
Paolo Bonzini4265d622011-10-17 12:32:14 +02005138int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5139 int nb_sectors)
5140{
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005141 int max_discard;
5142
Paolo Bonzini4265d622011-10-17 12:32:14 +02005143 if (!bs->drv) {
5144 return -ENOMEDIUM;
5145 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
5146 return -EIO;
5147 } else if (bs->read_only) {
5148 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005149 }
5150
Fam Zhenge4654d22013-11-13 18:29:43 +08005151 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005152
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01005153 /* Do nothing if disabled. */
5154 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5155 return 0;
5156 }
5157
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005158 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005159 return 0;
5160 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005161
5162 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
5163 while (nb_sectors > 0) {
5164 int ret;
5165 int num = nb_sectors;
5166
5167 /* align request */
5168 if (bs->bl.discard_alignment &&
5169 num >= bs->bl.discard_alignment &&
5170 sector_num % bs->bl.discard_alignment) {
5171 if (num > bs->bl.discard_alignment) {
5172 num = bs->bl.discard_alignment;
5173 }
5174 num -= sector_num % bs->bl.discard_alignment;
5175 }
5176
5177 /* limit request size */
5178 if (num > max_discard) {
5179 num = max_discard;
5180 }
5181
5182 if (bs->drv->bdrv_co_discard) {
5183 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5184 } else {
5185 BlockDriverAIOCB *acb;
5186 CoroutineIOCompletion co = {
5187 .coroutine = qemu_coroutine_self(),
5188 };
5189
5190 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5191 bdrv_co_io_em_complete, &co);
5192 if (acb == NULL) {
5193 return -EIO;
5194 } else {
5195 qemu_coroutine_yield();
5196 ret = co.ret;
5197 }
5198 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01005199 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005200 return ret;
5201 }
5202
5203 sector_num += num;
5204 nb_sectors -= num;
5205 }
5206 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005207}
5208
5209int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5210{
5211 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005212 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005213 .bs = bs,
5214 .sector_num = sector_num,
5215 .nb_sectors = nb_sectors,
5216 .ret = NOT_DONE,
5217 };
5218
5219 if (qemu_in_coroutine()) {
5220 /* Fast-path if already in coroutine context */
5221 bdrv_discard_co_entry(&rwco);
5222 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005223 AioContext *aio_context = bdrv_get_aio_context(bs);
5224
Paolo Bonzini4265d622011-10-17 12:32:14 +02005225 co = qemu_coroutine_create(bdrv_discard_co_entry);
5226 qemu_coroutine_enter(co, &rwco);
5227 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005228 aio_poll(aio_context, true);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005229 }
5230 }
5231
5232 return rwco.ret;
5233}
5234
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005235/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00005236/* removable device support */
5237
5238/**
5239 * Return TRUE if the media is present
5240 */
5241int bdrv_is_inserted(BlockDriverState *bs)
5242{
5243 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005244
bellard19cb3732006-08-19 11:45:59 +00005245 if (!drv)
5246 return 0;
5247 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005248 return 1;
5249 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00005250}
5251
5252/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005253 * Return whether the media changed since the last call to this
5254 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00005255 */
5256int bdrv_media_changed(BlockDriverState *bs)
5257{
5258 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005259
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005260 if (drv && drv->bdrv_media_changed) {
5261 return drv->bdrv_media_changed(bs);
5262 }
5263 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00005264}
5265
5266/**
5267 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5268 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02005269void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00005270{
5271 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005272
Markus Armbruster822e1cd2011-07-20 18:23:42 +02005273 if (drv && drv->bdrv_eject) {
5274 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00005275 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005276
5277 if (bs->device_name[0] != '\0') {
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02005278 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
5279 eject_flag, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005280 }
bellard19cb3732006-08-19 11:45:59 +00005281}
5282
bellard19cb3732006-08-19 11:45:59 +00005283/**
5284 * Lock or unlock the media (if it is locked, the user won't be able
5285 * to eject it manually).
5286 */
Markus Armbruster025e8492011-09-06 18:58:47 +02005287void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00005288{
5289 BlockDriver *drv = bs->drv;
5290
Markus Armbruster025e8492011-09-06 18:58:47 +02005291 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01005292
Markus Armbruster025e8492011-09-06 18:58:47 +02005293 if (drv && drv->bdrv_lock_medium) {
5294 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00005295 }
5296}
ths985a03b2007-12-24 16:10:43 +00005297
5298/* needed for generic scsi interface */
5299
5300int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5301{
5302 BlockDriver *drv = bs->drv;
5303
5304 if (drv && drv->bdrv_ioctl)
5305 return drv->bdrv_ioctl(bs, req, buf);
5306 return -ENOTSUP;
5307}
aliguori7d780662009-03-12 19:57:08 +00005308
aliguori221f7152009-03-28 17:28:41 +00005309BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5310 unsigned long int req, void *buf,
5311 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005312{
aliguori221f7152009-03-28 17:28:41 +00005313 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005314
aliguori221f7152009-03-28 17:28:41 +00005315 if (drv && drv->bdrv_aio_ioctl)
5316 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5317 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005318}
aliguorie268ca52009-04-22 20:20:00 +00005319
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005320void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005321{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005322 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005323}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005324
aliguorie268ca52009-04-22 20:20:00 +00005325void *qemu_blockalign(BlockDriverState *bs, size_t size)
5326{
Kevin Wolf339064d2013-11-28 10:23:32 +01005327 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005328}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005329
Kevin Wolf7d2a35c2014-05-20 12:24:05 +02005330void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
5331{
5332 size_t align = bdrv_opt_mem_align(bs);
5333
5334 /* Ensure that NULL is never returned on success */
5335 assert(align > 0);
5336 if (size == 0) {
5337 size = align;
5338 }
5339
5340 return qemu_try_memalign(align, size);
5341}
5342
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005343/*
5344 * Check if all memory in this vector is sector aligned.
5345 */
5346bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5347{
5348 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005349 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005350
5351 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005352 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005353 return false;
5354 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005355 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005356 return false;
5357 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005358 }
5359
5360 return true;
5361}
5362
Fam Zhengb8afb522014-04-16 09:34:30 +08005363BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
5364 Error **errp)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005365{
5366 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005367 BdrvDirtyBitmap *bitmap;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005368
Paolo Bonzini50717e92013-01-21 17:09:45 +01005369 assert((granularity & (granularity - 1)) == 0);
5370
Fam Zhenge4654d22013-11-13 18:29:43 +08005371 granularity >>= BDRV_SECTOR_BITS;
5372 assert(granularity);
Markus Armbruster57322b72014-06-26 13:23:22 +02005373 bitmap_size = bdrv_nb_sectors(bs);
Fam Zhengb8afb522014-04-16 09:34:30 +08005374 if (bitmap_size < 0) {
5375 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5376 errno = -bitmap_size;
5377 return NULL;
5378 }
Markus Armbruster5839e532014-08-19 10:31:08 +02005379 bitmap = g_new0(BdrvDirtyBitmap, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +08005380 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5381 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5382 return bitmap;
5383}
5384
5385void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5386{
5387 BdrvDirtyBitmap *bm, *next;
5388 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5389 if (bm == bitmap) {
5390 QLIST_REMOVE(bitmap, list);
5391 hbitmap_free(bitmap->bitmap);
5392 g_free(bitmap);
5393 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005394 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005395 }
5396}
5397
Fam Zheng21b56832013-11-13 18:29:44 +08005398BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5399{
5400 BdrvDirtyBitmap *bm;
5401 BlockDirtyInfoList *list = NULL;
5402 BlockDirtyInfoList **plist = &list;
5403
5404 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
Markus Armbruster5839e532014-08-19 10:31:08 +02005405 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
5406 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
Fam Zheng21b56832013-11-13 18:29:44 +08005407 info->count = bdrv_get_dirty_count(bs, bm);
5408 info->granularity =
5409 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5410 entry->value = info;
5411 *plist = entry;
5412 plist = &entry->next;
5413 }
5414
5415 return list;
5416}
5417
Fam Zhenge4654d22013-11-13 18:29:43 +08005418int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005419{
Fam Zhenge4654d22013-11-13 18:29:43 +08005420 if (bitmap) {
5421 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005422 } else {
5423 return 0;
5424 }
5425}
5426
Fam Zhenge4654d22013-11-13 18:29:43 +08005427void bdrv_dirty_iter_init(BlockDriverState *bs,
5428 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005429{
Fam Zhenge4654d22013-11-13 18:29:43 +08005430 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005431}
5432
5433void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5434 int nr_sectors)
5435{
Fam Zhenge4654d22013-11-13 18:29:43 +08005436 BdrvDirtyBitmap *bitmap;
5437 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5438 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005439 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005440}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005441
Fam Zhenge4654d22013-11-13 18:29:43 +08005442void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
5443{
5444 BdrvDirtyBitmap *bitmap;
5445 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5446 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5447 }
5448}
5449
5450int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5451{
5452 return hbitmap_count(bitmap->bitmap);
5453}
5454
Fam Zheng9fcb0252013-08-23 09:14:46 +08005455/* Get a reference to bs */
5456void bdrv_ref(BlockDriverState *bs)
5457{
5458 bs->refcnt++;
5459}
5460
5461/* Release a previously grabbed reference to bs.
5462 * If after releasing, reference count is zero, the BlockDriverState is
5463 * deleted. */
5464void bdrv_unref(BlockDriverState *bs)
5465{
Jeff Cody9a4d5ca2014-07-23 17:22:57 -04005466 if (!bs) {
5467 return;
5468 }
Fam Zheng9fcb0252013-08-23 09:14:46 +08005469 assert(bs->refcnt > 0);
5470 if (--bs->refcnt == 0) {
5471 bdrv_delete(bs);
5472 }
5473}
5474
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005475struct BdrvOpBlocker {
5476 Error *reason;
5477 QLIST_ENTRY(BdrvOpBlocker) list;
5478};
5479
5480bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5481{
5482 BdrvOpBlocker *blocker;
5483 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5484 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5485 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5486 if (errp) {
5487 error_setg(errp, "Device '%s' is busy: %s",
5488 bs->device_name, error_get_pretty(blocker->reason));
5489 }
5490 return true;
5491 }
5492 return false;
5493}
5494
5495void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5496{
5497 BdrvOpBlocker *blocker;
5498 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5499
Markus Armbruster5839e532014-08-19 10:31:08 +02005500 blocker = g_new0(BdrvOpBlocker, 1);
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005501 blocker->reason = reason;
5502 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5503}
5504
5505void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5506{
5507 BdrvOpBlocker *blocker, *next;
5508 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5509 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5510 if (blocker->reason == reason) {
5511 QLIST_REMOVE(blocker, list);
5512 g_free(blocker);
5513 }
5514 }
5515}
5516
5517void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5518{
5519 int i;
5520 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5521 bdrv_op_block(bs, i, reason);
5522 }
5523}
5524
5525void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5526{
5527 int i;
5528 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5529 bdrv_op_unblock(bs, i, reason);
5530 }
5531}
5532
5533bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5534{
5535 int i;
5536
5537 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5538 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5539 return false;
5540 }
5541 }
5542 return true;
5543}
5544
Luiz Capitulino28a72822011-09-26 17:43:50 -03005545void bdrv_iostatus_enable(BlockDriverState *bs)
5546{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005547 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005548 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005549}
5550
5551/* The I/O status is only enabled if the drive explicitly
5552 * enables it _and_ the VM is configured to stop on errors */
5553bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5554{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005555 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005556 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5557 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5558 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005559}
5560
5561void bdrv_iostatus_disable(BlockDriverState *bs)
5562{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005563 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005564}
5565
5566void bdrv_iostatus_reset(BlockDriverState *bs)
5567{
5568 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005569 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005570 if (bs->job) {
5571 block_job_iostatus_reset(bs->job);
5572 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005573 }
5574}
5575
Luiz Capitulino28a72822011-09-26 17:43:50 -03005576void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5577{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005578 assert(bdrv_iostatus_is_enabled(bs));
5579 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005580 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5581 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005582 }
5583}
5584
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005585void bdrv_img_create(const char *filename, const char *fmt,
5586 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005587 char *options, uint64_t img_size, int flags,
5588 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005589{
Chunyan Liu83d05212014-06-05 17:20:51 +08005590 QemuOptsList *create_opts = NULL;
5591 QemuOpts *opts = NULL;
5592 const char *backing_fmt, *backing_file;
5593 int64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005594 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005595 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005596 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005597 int ret = 0;
5598
5599 /* Find driver and parse its options */
5600 drv = bdrv_find_format(fmt);
5601 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005602 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005603 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005604 }
5605
Kevin Wolf98289622013-07-10 15:47:39 +02005606 proto_drv = bdrv_find_protocol(filename, true);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005607 if (!proto_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005608 error_setg(errp, "Unknown protocol '%s'", filename);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005609 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005610 }
5611
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005612 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5613 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005614
5615 /* Create parameter list with default values */
Chunyan Liu83d05212014-06-05 17:20:51 +08005616 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5617 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005618
5619 /* Parse -o options */
5620 if (options) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005621 if (qemu_opts_do_parse(opts, options, NULL) != 0) {
5622 error_setg(errp, "Invalid options for file format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005623 goto out;
5624 }
5625 }
5626
5627 if (base_filename) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005628 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005629 error_setg(errp, "Backing file not supported for file format '%s'",
5630 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005631 goto out;
5632 }
5633 }
5634
5635 if (base_fmt) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005636 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005637 error_setg(errp, "Backing file format not supported for file "
5638 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005639 goto out;
5640 }
5641 }
5642
Chunyan Liu83d05212014-06-05 17:20:51 +08005643 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5644 if (backing_file) {
5645 if (!strcmp(filename, backing_file)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005646 error_setg(errp, "Error: Trying to create an image with the "
5647 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01005648 goto out;
5649 }
5650 }
5651
Chunyan Liu83d05212014-06-05 17:20:51 +08005652 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5653 if (backing_fmt) {
5654 backing_drv = bdrv_find_format(backing_fmt);
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005655 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005656 error_setg(errp, "Unknown backing file format '%s'",
Chunyan Liu83d05212014-06-05 17:20:51 +08005657 backing_fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005658 goto out;
5659 }
5660 }
5661
5662 // The size for the image must always be specified, with one exception:
5663 // If we are using a backing file, we can obtain the size from there
Chunyan Liu83d05212014-06-05 17:20:51 +08005664 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5665 if (size == -1) {
5666 if (backing_file) {
Max Reitz66f6b812013-12-03 14:57:52 +01005667 BlockDriverState *bs;
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005668 int64_t size;
Paolo Bonzini63090da2012-04-12 14:01:03 +02005669 int back_flags;
5670
5671 /* backing files always opened read-only */
5672 back_flags =
5673 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005674
Max Reitzf67503e2014-02-18 18:33:05 +01005675 bs = NULL;
Chunyan Liu83d05212014-06-05 17:20:51 +08005676 ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02005677 backing_drv, &local_err);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005678 if (ret < 0) {
Max Reitzcc84d902013-09-06 17:14:26 +02005679 error_setg_errno(errp, -ret, "Could not open '%s': %s",
Chunyan Liu83d05212014-06-05 17:20:51 +08005680 backing_file,
Max Reitzcc84d902013-09-06 17:14:26 +02005681 error_get_pretty(local_err));
5682 error_free(local_err);
5683 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005684 goto out;
5685 }
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005686 size = bdrv_getlength(bs);
5687 if (size < 0) {
5688 error_setg_errno(errp, -size, "Could not get size of '%s'",
5689 backing_file);
5690 bdrv_unref(bs);
5691 goto out;
5692 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005693
Chunyan Liu83d05212014-06-05 17:20:51 +08005694 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size);
Max Reitz66f6b812013-12-03 14:57:52 +01005695
5696 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005697 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005698 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005699 goto out;
5700 }
5701 }
5702
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005703 if (!quiet) {
5704 printf("Formatting '%s', fmt=%s ", filename, fmt);
Chunyan Liu83d05212014-06-05 17:20:51 +08005705 qemu_opts_print(opts);
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005706 puts("");
5707 }
Chunyan Liu83d05212014-06-05 17:20:51 +08005708
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005709 ret = bdrv_create(drv, filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08005710
Max Reitzcc84d902013-09-06 17:14:26 +02005711 if (ret == -EFBIG) {
5712 /* This is generally a better message than whatever the driver would
5713 * deliver (especially because of the cluster_size_hint), since that
5714 * is most probably not much different from "image too large". */
5715 const char *cluster_size_hint = "";
Chunyan Liu83d05212014-06-05 17:20:51 +08005716 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
Max Reitzcc84d902013-09-06 17:14:26 +02005717 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005718 }
Max Reitzcc84d902013-09-06 17:14:26 +02005719 error_setg(errp, "The image size is too large for file format '%s'"
5720 "%s", fmt, cluster_size_hint);
5721 error_free(local_err);
5722 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005723 }
5724
5725out:
Chunyan Liu83d05212014-06-05 17:20:51 +08005726 qemu_opts_del(opts);
5727 qemu_opts_free(create_opts);
Markus Armbruster84d18f02014-01-30 15:07:28 +01005728 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02005729 error_propagate(errp, local_err);
5730 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005731}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005732
5733AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5734{
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005735 return bs->aio_context;
5736}
5737
5738void bdrv_detach_aio_context(BlockDriverState *bs)
5739{
Max Reitz33384422014-06-20 21:57:33 +02005740 BdrvAioNotifier *baf;
5741
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005742 if (!bs->drv) {
5743 return;
5744 }
5745
Max Reitz33384422014-06-20 21:57:33 +02005746 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
5747 baf->detach_aio_context(baf->opaque);
5748 }
5749
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005750 if (bs->io_limits_enabled) {
5751 throttle_detach_aio_context(&bs->throttle_state);
5752 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005753 if (bs->drv->bdrv_detach_aio_context) {
5754 bs->drv->bdrv_detach_aio_context(bs);
5755 }
5756 if (bs->file) {
5757 bdrv_detach_aio_context(bs->file);
5758 }
5759 if (bs->backing_hd) {
5760 bdrv_detach_aio_context(bs->backing_hd);
5761 }
5762
5763 bs->aio_context = NULL;
5764}
5765
5766void bdrv_attach_aio_context(BlockDriverState *bs,
5767 AioContext *new_context)
5768{
Max Reitz33384422014-06-20 21:57:33 +02005769 BdrvAioNotifier *ban;
5770
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005771 if (!bs->drv) {
5772 return;
5773 }
5774
5775 bs->aio_context = new_context;
5776
5777 if (bs->backing_hd) {
5778 bdrv_attach_aio_context(bs->backing_hd, new_context);
5779 }
5780 if (bs->file) {
5781 bdrv_attach_aio_context(bs->file, new_context);
5782 }
5783 if (bs->drv->bdrv_attach_aio_context) {
5784 bs->drv->bdrv_attach_aio_context(bs, new_context);
5785 }
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005786 if (bs->io_limits_enabled) {
5787 throttle_attach_aio_context(&bs->throttle_state, new_context);
5788 }
Max Reitz33384422014-06-20 21:57:33 +02005789
5790 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
5791 ban->attached_aio_context(new_context, ban->opaque);
5792 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005793}
5794
5795void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
5796{
5797 bdrv_drain_all(); /* ensure there are no in-flight requests */
5798
5799 bdrv_detach_aio_context(bs);
5800
5801 /* This function executes in the old AioContext so acquire the new one in
5802 * case it runs in a different thread.
5803 */
5804 aio_context_acquire(new_context);
5805 bdrv_attach_aio_context(bs, new_context);
5806 aio_context_release(new_context);
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005807}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005808
Max Reitz33384422014-06-20 21:57:33 +02005809void bdrv_add_aio_context_notifier(BlockDriverState *bs,
5810 void (*attached_aio_context)(AioContext *new_context, void *opaque),
5811 void (*detach_aio_context)(void *opaque), void *opaque)
5812{
5813 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
5814 *ban = (BdrvAioNotifier){
5815 .attached_aio_context = attached_aio_context,
5816 .detach_aio_context = detach_aio_context,
5817 .opaque = opaque
5818 };
5819
5820 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
5821}
5822
5823void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
5824 void (*attached_aio_context)(AioContext *,
5825 void *),
5826 void (*detach_aio_context)(void *),
5827 void *opaque)
5828{
5829 BdrvAioNotifier *ban, *ban_next;
5830
5831 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
5832 if (ban->attached_aio_context == attached_aio_context &&
5833 ban->detach_aio_context == detach_aio_context &&
5834 ban->opaque == opaque)
5835 {
5836 QLIST_REMOVE(ban, list);
5837 g_free(ban);
5838
5839 return;
5840 }
5841 }
5842
5843 abort();
5844}
5845
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005846void bdrv_add_before_write_notifier(BlockDriverState *bs,
5847 NotifierWithReturn *notifier)
5848{
5849 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5850}
Max Reitz6f176b42013-09-03 10:09:50 +02005851
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005852int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts)
Max Reitz6f176b42013-09-03 10:09:50 +02005853{
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005854 if (!bs->drv->bdrv_amend_options) {
Max Reitz6f176b42013-09-03 10:09:50 +02005855 return -ENOTSUP;
5856 }
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005857 return bs->drv->bdrv_amend_options(bs, opts);
Max Reitz6f176b42013-09-03 10:09:50 +02005858}
Benoît Canetf6186f42013-10-02 14:33:48 +02005859
Benoît Canetb5042a32014-03-03 19:11:34 +01005860/* This function will be called by the bdrv_recurse_is_first_non_filter method
5861 * of block filter and by bdrv_is_first_non_filter.
5862 * It is used to test if the given bs is the candidate or recurse more in the
5863 * node graph.
Benoît Canet212a5a82014-01-23 21:31:36 +01005864 */
Benoît Canet212a5a82014-01-23 21:31:36 +01005865bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5866 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02005867{
Benoît Canetb5042a32014-03-03 19:11:34 +01005868 /* return false if basic checks fails */
5869 if (!bs || !bs->drv) {
5870 return false;
5871 }
5872
5873 /* the code reached a non block filter driver -> check if the bs is
5874 * the same as the candidate. It's the recursion termination condition.
5875 */
5876 if (!bs->drv->is_filter) {
5877 return bs == candidate;
5878 }
5879 /* Down this path the driver is a block filter driver */
5880
5881 /* If the block filter recursion method is defined use it to recurse down
5882 * the node graph.
5883 */
5884 if (bs->drv->bdrv_recurse_is_first_non_filter) {
Benoît Canet212a5a82014-01-23 21:31:36 +01005885 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
5886 }
5887
Benoît Canetb5042a32014-03-03 19:11:34 +01005888 /* the driver is a block filter but don't allow to recurse -> return false
5889 */
5890 return false;
Benoît Canet212a5a82014-01-23 21:31:36 +01005891}
5892
5893/* This function checks if the candidate is the first non filter bs down it's
5894 * bs chain. Since we don't have pointers to parents it explore all bs chains
5895 * from the top. Some filters can choose not to pass down the recursion.
5896 */
5897bool bdrv_is_first_non_filter(BlockDriverState *candidate)
5898{
5899 BlockDriverState *bs;
5900
5901 /* walk down the bs forest recursively */
5902 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5903 bool perm;
5904
Benoît Canetb5042a32014-03-03 19:11:34 +01005905 /* try to recurse in this top level bs */
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01005906 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01005907
5908 /* candidate is the first non filter */
5909 if (perm) {
5910 return true;
5911 }
5912 }
5913
5914 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02005915}
Benoît Canet09158f02014-06-27 18:25:25 +02005916
5917BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
5918{
5919 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
5920 if (!to_replace_bs) {
5921 error_setg(errp, "Node name '%s' not found", node_name);
5922 return NULL;
5923 }
5924
5925 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
5926 return NULL;
5927 }
5928
5929 /* We don't want arbitrary node of the BDS chain to be replaced only the top
5930 * most non filter in order to prevent data corruption.
5931 * Another benefit is that this tests exclude backing files which are
5932 * blocked by the backing blockers.
5933 */
5934 if (!bdrv_is_first_non_filter(to_replace_bs)) {
5935 error_setg(errp, "Only top most non filter can be replaced");
5936 return NULL;
5937 }
5938
5939 return to_replace_bs;
5940}
Ming Lei448ad912014-07-04 18:04:33 +08005941
5942void bdrv_io_plug(BlockDriverState *bs)
5943{
5944 BlockDriver *drv = bs->drv;
5945 if (drv && drv->bdrv_io_plug) {
5946 drv->bdrv_io_plug(bs);
5947 } else if (bs->file) {
5948 bdrv_io_plug(bs->file);
5949 }
5950}
5951
5952void bdrv_io_unplug(BlockDriverState *bs)
5953{
5954 BlockDriver *drv = bs->drv;
5955 if (drv && drv->bdrv_io_unplug) {
5956 drv->bdrv_io_unplug(bs);
5957 } else if (bs->file) {
5958 bdrv_io_unplug(bs->file);
5959 }
5960}
5961
5962void bdrv_flush_io_queue(BlockDriverState *bs)
5963{
5964 BlockDriver *drv = bs->drv;
5965 if (drv && drv->bdrv_flush_io_queue) {
5966 drv->bdrv_flush_io_queue(bs);
5967 } else if (bs->file) {
5968 bdrv_flush_io_queue(bs->file);
5969 }
5970}
Max Reitz91af7012014-07-18 20:24:56 +02005971
5972static bool append_open_options(QDict *d, BlockDriverState *bs)
5973{
5974 const QDictEntry *entry;
5975 bool found_any = false;
5976
5977 for (entry = qdict_first(bs->options); entry;
5978 entry = qdict_next(bs->options, entry))
5979 {
5980 /* Only take options for this level and exclude all non-driver-specific
5981 * options */
5982 if (!strchr(qdict_entry_key(entry), '.') &&
5983 strcmp(qdict_entry_key(entry), "node-name"))
5984 {
5985 qobject_incref(qdict_entry_value(entry));
5986 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
5987 found_any = true;
5988 }
5989 }
5990
5991 return found_any;
5992}
5993
5994/* Updates the following BDS fields:
5995 * - exact_filename: A filename which may be used for opening a block device
5996 * which (mostly) equals the given BDS (even without any
5997 * other options; so reading and writing must return the same
5998 * results, but caching etc. may be different)
5999 * - full_open_options: Options which, when given when opening a block device
6000 * (without a filename), result in a BDS (mostly)
6001 * equalling the given one
6002 * - filename: If exact_filename is set, it is copied here. Otherwise,
6003 * full_open_options is converted to a JSON object, prefixed with
6004 * "json:" (for use through the JSON pseudo protocol) and put here.
6005 */
6006void bdrv_refresh_filename(BlockDriverState *bs)
6007{
6008 BlockDriver *drv = bs->drv;
6009 QDict *opts;
6010
6011 if (!drv) {
6012 return;
6013 }
6014
6015 /* This BDS's file name will most probably depend on its file's name, so
6016 * refresh that first */
6017 if (bs->file) {
6018 bdrv_refresh_filename(bs->file);
6019 }
6020
6021 if (drv->bdrv_refresh_filename) {
6022 /* Obsolete information is of no use here, so drop the old file name
6023 * information before refreshing it */
6024 bs->exact_filename[0] = '\0';
6025 if (bs->full_open_options) {
6026 QDECREF(bs->full_open_options);
6027 bs->full_open_options = NULL;
6028 }
6029
6030 drv->bdrv_refresh_filename(bs);
6031 } else if (bs->file) {
6032 /* Try to reconstruct valid information from the underlying file */
6033 bool has_open_options;
6034
6035 bs->exact_filename[0] = '\0';
6036 if (bs->full_open_options) {
6037 QDECREF(bs->full_open_options);
6038 bs->full_open_options = NULL;
6039 }
6040
6041 opts = qdict_new();
6042 has_open_options = append_open_options(opts, bs);
6043
6044 /* If no specific options have been given for this BDS, the filename of
6045 * the underlying file should suffice for this one as well */
6046 if (bs->file->exact_filename[0] && !has_open_options) {
6047 strcpy(bs->exact_filename, bs->file->exact_filename);
6048 }
6049 /* Reconstructing the full options QDict is simple for most format block
6050 * drivers, as long as the full options are known for the underlying
6051 * file BDS. The full options QDict of that file BDS should somehow
6052 * contain a representation of the filename, therefore the following
6053 * suffices without querying the (exact_)filename of this BDS. */
6054 if (bs->file->full_open_options) {
6055 qdict_put_obj(opts, "driver",
6056 QOBJECT(qstring_from_str(drv->format_name)));
6057 QINCREF(bs->file->full_open_options);
6058 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
6059
6060 bs->full_open_options = opts;
6061 } else {
6062 QDECREF(opts);
6063 }
6064 } else if (!bs->full_open_options && qdict_size(bs->options)) {
6065 /* There is no underlying file BDS (at least referenced by BDS.file),
6066 * so the full options QDict should be equal to the options given
6067 * specifically for this block device when it was opened (plus the
6068 * driver specification).
6069 * Because those options don't change, there is no need to update
6070 * full_open_options when it's already set. */
6071
6072 opts = qdict_new();
6073 append_open_options(opts, bs);
6074 qdict_put_obj(opts, "driver",
6075 QOBJECT(qstring_from_str(drv->format_name)));
6076
6077 if (bs->exact_filename[0]) {
6078 /* This may not work for all block protocol drivers (some may
6079 * require this filename to be parsed), but we have to find some
6080 * default solution here, so just include it. If some block driver
6081 * does not support pure options without any filename at all or
6082 * needs some special format of the options QDict, it needs to
6083 * implement the driver-specific bdrv_refresh_filename() function.
6084 */
6085 qdict_put_obj(opts, "filename",
6086 QOBJECT(qstring_from_str(bs->exact_filename)));
6087 }
6088
6089 bs->full_open_options = opts;
6090 }
6091
6092 if (bs->exact_filename[0]) {
6093 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
6094 } else if (bs->full_open_options) {
6095 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6096 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
6097 qstring_get_str(json));
6098 QDECREF(json);
6099 }
6100}
Benoît Canet5366d0c2014-09-05 15:46:18 +02006101
6102/* This accessor function purpose is to allow the device models to access the
6103 * BlockAcctStats structure embedded inside a BlockDriverState without being
6104 * aware of the BlockDriverState structure layout.
6105 * It will go away when the BlockAcctStats structure will be moved inside
6106 * the device models.
6107 */
6108BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
6109{
6110 return &bs->stats;
6111}