blob: 38bbdf308384acca5d0283059f28f2db1786b1ce [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini83c90892012-12-17 18:19:49 +010027#include "monitor/monitor.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010028#include "block/block_int.h"
29#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010030#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010031#include "qapi/qmp/qjson.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010032#include "sysemu/sysemu.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010033#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010034#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010035#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030036#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010037#include "qemu/timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000038
Juan Quintela71e72a12009-07-27 16:12:56 +020039#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000040#include <sys/types.h>
41#include <sys/stat.h>
42#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000043#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000044#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000045#include <sys/disk.h>
46#endif
blueswir1c5e97232009-03-07 20:06:23 +000047#endif
bellard7674e7b2005-04-26 21:59:26 +000048
aliguori49dc7682009-03-08 16:26:59 +000049#ifdef _WIN32
50#include <windows.h>
51#endif
52
Fam Zhenge4654d22013-11-13 18:29:43 +080053struct BdrvDirtyBitmap {
54 HBitmap *bitmap;
55 QLIST_ENTRY(BdrvDirtyBitmap) list;
56};
57
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010058#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
59
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020060static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000061static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
62 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000063 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000064static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000066 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020067static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors,
69 QEMUIOVector *iov);
70static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010073static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
74 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000075 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010076static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000078 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010079static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
80 int64_t sector_num,
81 QEMUIOVector *qiov,
82 int nb_sectors,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +010083 BdrvRequestFlags flags,
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010084 BlockDriverCompletionFunc *cb,
85 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010086 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010087static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010088static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +020089 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +000090
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010091static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000093
Benoît Canetdc364f42014-01-23 21:31:32 +010094static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
96
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010097static QLIST_HEAD(, BlockDriver) bdrv_drivers =
98 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000099
Markus Armbrustereb852012009-10-27 18:41:44 +0100100/* If non-zero, use only whitelisted block drivers */
101static int use_bdrv_whitelist;
102
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000103#ifdef _WIN32
104static int is_windows_drive_prefix(const char *filename)
105{
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109}
110
111int is_windows_drive(const char *filename)
112{
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120}
121#endif
122
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800123/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200124void bdrv_set_io_limits(BlockDriverState *bs,
125 ThrottleConfig *cfg)
126{
127 int i;
128
129 throttle_config(&bs->throttle_state, cfg);
130
131 for (i = 0; i < 2; i++) {
132 qemu_co_enter_next(&bs->throttled_reqs[i]);
133 }
134}
135
136/* this function drain all the throttled IOs */
137static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
138{
139 bool drained = false;
140 bool enabled = bs->io_limits_enabled;
141 int i;
142
143 bs->io_limits_enabled = false;
144
145 for (i = 0; i < 2; i++) {
146 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
147 drained = true;
148 }
149 }
150
151 bs->io_limits_enabled = enabled;
152
153 return drained;
154}
155
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800156void bdrv_io_limits_disable(BlockDriverState *bs)
157{
158 bs->io_limits_enabled = false;
159
Benoît Canetcc0681c2013-09-02 14:14:39 +0200160 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800161
Benoît Canetcc0681c2013-09-02 14:14:39 +0200162 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800163}
164
Benoît Canetcc0681c2013-09-02 14:14:39 +0200165static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800166{
167 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200168 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800169}
170
Benoît Canetcc0681c2013-09-02 14:14:39 +0200171static void bdrv_throttle_write_timer_cb(void *opaque)
172{
173 BlockDriverState *bs = opaque;
174 qemu_co_enter_next(&bs->throttled_reqs[1]);
175}
176
177/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800178void bdrv_io_limits_enable(BlockDriverState *bs)
179{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200180 assert(!bs->io_limits_enabled);
181 throttle_init(&bs->throttle_state,
182 QEMU_CLOCK_VIRTUAL,
183 bdrv_throttle_read_timer_cb,
184 bdrv_throttle_write_timer_cb,
185 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800186 bs->io_limits_enabled = true;
187}
188
Benoît Canetcc0681c2013-09-02 14:14:39 +0200189/* This function makes an IO wait if needed
190 *
191 * @nb_sectors: the number of sectors of the IO
192 * @is_write: is the IO a write
193 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800194static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100195 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200196 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800197{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200198 /* does this io must wait */
199 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800200
Benoît Canetcc0681c2013-09-02 14:14:39 +0200201 /* if must wait or any request of this type throttled queue the IO */
202 if (must_wait ||
203 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
204 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800205 }
206
Benoît Canetcc0681c2013-09-02 14:14:39 +0200207 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100208 throttle_account(&bs->throttle_state, is_write, bytes);
209
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800210
Benoît Canetcc0681c2013-09-02 14:14:39 +0200211 /* if the next request must wait -> do nothing */
212 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
213 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800214 }
215
Benoît Canetcc0681c2013-09-02 14:14:39 +0200216 /* else queue next request for execution */
217 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800218}
219
Kevin Wolf339064d2013-11-28 10:23:32 +0100220size_t bdrv_opt_mem_align(BlockDriverState *bs)
221{
222 if (!bs || !bs->drv) {
223 /* 4k should be on the safe side */
224 return 4096;
225 }
226
227 return bs->bl.opt_mem_alignment;
228}
229
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000230/* check if the path starts with "<protocol>:" */
231static int path_has_protocol(const char *path)
232{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200233 const char *p;
234
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000235#ifdef _WIN32
236 if (is_windows_drive(path) ||
237 is_windows_drive_prefix(path)) {
238 return 0;
239 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200240 p = path + strcspn(path, ":/\\");
241#else
242 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000243#endif
244
Paolo Bonzini947995c2012-05-08 16:51:48 +0200245 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000246}
247
bellard83f64092006-08-01 16:21:11 +0000248int path_is_absolute(const char *path)
249{
bellard21664422007-01-07 18:22:37 +0000250#ifdef _WIN32
251 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200252 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000253 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200254 }
255 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000256#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200257 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000258#endif
bellard83f64092006-08-01 16:21:11 +0000259}
260
261/* if filename is absolute, just copy it to dest. Otherwise, build a
262 path to it by considering it is relative to base_path. URL are
263 supported. */
264void path_combine(char *dest, int dest_size,
265 const char *base_path,
266 const char *filename)
267{
268 const char *p, *p1;
269 int len;
270
271 if (dest_size <= 0)
272 return;
273 if (path_is_absolute(filename)) {
274 pstrcpy(dest, dest_size, filename);
275 } else {
276 p = strchr(base_path, ':');
277 if (p)
278 p++;
279 else
280 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000281 p1 = strrchr(base_path, '/');
282#ifdef _WIN32
283 {
284 const char *p2;
285 p2 = strrchr(base_path, '\\');
286 if (!p1 || p2 > p1)
287 p1 = p2;
288 }
289#endif
bellard83f64092006-08-01 16:21:11 +0000290 if (p1)
291 p1++;
292 else
293 p1 = base_path;
294 if (p1 > p)
295 p = p1;
296 len = p - base_path;
297 if (len > dest_size - 1)
298 len = dest_size - 1;
299 memcpy(dest, base_path, len);
300 dest[len] = '\0';
301 pstrcat(dest, dest_size, filename);
302 }
303}
304
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200305void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
306{
307 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
308 pstrcpy(dest, sz, bs->backing_file);
309 } else {
310 path_combine(dest, sz, bs->filename, bs->backing_file);
311 }
312}
313
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500314void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000315{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100316 /* Block drivers without coroutine functions need emulation */
317 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200318 bdrv->bdrv_co_readv = bdrv_co_readv_em;
319 bdrv->bdrv_co_writev = bdrv_co_writev_em;
320
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100321 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
322 * the block driver lacks aio we need to emulate that too.
323 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200324 if (!bdrv->bdrv_aio_readv) {
325 /* add AIO emulation layer */
326 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
327 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200328 }
bellard83f64092006-08-01 16:21:11 +0000329 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200330
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100331 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000332}
bellardb3380822004-03-14 21:38:54 +0000333
334/* create a new block device (by default it is empty) */
335BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000336{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100337 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000338
Anthony Liguori7267c092011-08-20 22:09:37 -0500339 bs = g_malloc0(sizeof(BlockDriverState));
Fam Zhenge4654d22013-11-13 18:29:43 +0800340 QLIST_INIT(&bs->dirty_bitmaps);
bellardb3380822004-03-14 21:38:54 +0000341 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000342 if (device_name[0] != '\0') {
Benoît Canetdc364f42014-01-23 21:31:32 +0100343 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
bellardea2384d2004-08-01 21:59:26 +0000344 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300345 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200346 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200347 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200348 qemu_co_queue_init(&bs->throttled_reqs[0]);
349 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800350 bs->refcnt = 1;
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200351
bellardb3380822004-03-14 21:38:54 +0000352 return bs;
353}
354
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200355void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
356{
357 notifier_list_add(&bs->close_notifiers, notify);
358}
359
bellardea2384d2004-08-01 21:59:26 +0000360BlockDriver *bdrv_find_format(const char *format_name)
361{
362 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100363 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
364 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000365 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100366 }
bellardea2384d2004-08-01 21:59:26 +0000367 }
368 return NULL;
369}
370
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800371static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100372{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800373 static const char *whitelist_rw[] = {
374 CONFIG_BDRV_RW_WHITELIST
375 };
376 static const char *whitelist_ro[] = {
377 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100378 };
379 const char **p;
380
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800381 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100382 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800383 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100384
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800385 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100386 if (!strcmp(drv->format_name, *p)) {
387 return 1;
388 }
389 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800390 if (read_only) {
391 for (p = whitelist_ro; *p; p++) {
392 if (!strcmp(drv->format_name, *p)) {
393 return 1;
394 }
395 }
396 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100397 return 0;
398}
399
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800400BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
401 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100402{
403 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800404 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100405}
406
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800407typedef struct CreateCo {
408 BlockDriver *drv;
409 char *filename;
410 QEMUOptionParameter *options;
411 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200412 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800413} CreateCo;
414
415static void coroutine_fn bdrv_create_co_entry(void *opaque)
416{
Max Reitzcc84d902013-09-06 17:14:26 +0200417 Error *local_err = NULL;
418 int ret;
419
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800420 CreateCo *cco = opaque;
421 assert(cco->drv);
422
Max Reitzcc84d902013-09-06 17:14:26 +0200423 ret = cco->drv->bdrv_create(cco->filename, cco->options, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100424 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200425 error_propagate(&cco->err, local_err);
426 }
427 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800428}
429
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200430int bdrv_create(BlockDriver *drv, const char* filename,
Max Reitzcc84d902013-09-06 17:14:26 +0200431 QEMUOptionParameter *options, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000432{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800433 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200434
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800435 Coroutine *co;
436 CreateCo cco = {
437 .drv = drv,
438 .filename = g_strdup(filename),
439 .options = options,
440 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200441 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800442 };
443
444 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200445 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300446 ret = -ENOTSUP;
447 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800448 }
449
450 if (qemu_in_coroutine()) {
451 /* Fast-path if already in coroutine context */
452 bdrv_create_co_entry(&cco);
453 } else {
454 co = qemu_coroutine_create(bdrv_create_co_entry);
455 qemu_coroutine_enter(co, &cco);
456 while (cco.ret == NOT_DONE) {
457 qemu_aio_wait();
458 }
459 }
460
461 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200462 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100463 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200464 error_propagate(errp, cco.err);
465 } else {
466 error_setg_errno(errp, -ret, "Could not create image");
467 }
468 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800469
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300470out:
471 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800472 return ret;
bellardea2384d2004-08-01 21:59:26 +0000473}
474
Max Reitzcc84d902013-09-06 17:14:26 +0200475int bdrv_create_file(const char* filename, QEMUOptionParameter *options,
476 Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200477{
478 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200479 Error *local_err = NULL;
480 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200481
Kevin Wolf98289622013-07-10 15:47:39 +0200482 drv = bdrv_find_protocol(filename, true);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200483 if (drv == NULL) {
Max Reitzcc84d902013-09-06 17:14:26 +0200484 error_setg(errp, "Could not find protocol for file '%s'", filename);
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000485 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200486 }
487
Max Reitzcc84d902013-09-06 17:14:26 +0200488 ret = bdrv_create(drv, filename, options, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100489 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200490 error_propagate(errp, local_err);
491 }
492 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200493}
494
Kevin Wolf355ef4a2013-12-11 20:14:09 +0100495int bdrv_refresh_limits(BlockDriverState *bs)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100496{
497 BlockDriver *drv = bs->drv;
498
499 memset(&bs->bl, 0, sizeof(bs->bl));
500
Kevin Wolf466ad822013-12-11 19:50:32 +0100501 if (!drv) {
502 return 0;
503 }
504
505 /* Take some limits from the children as a default */
506 if (bs->file) {
507 bdrv_refresh_limits(bs->file);
508 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100509 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
510 } else {
511 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100512 }
513
514 if (bs->backing_hd) {
515 bdrv_refresh_limits(bs->backing_hd);
516 bs->bl.opt_transfer_length =
517 MAX(bs->bl.opt_transfer_length,
518 bs->backing_hd->bl.opt_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100519 bs->bl.opt_mem_alignment =
520 MAX(bs->bl.opt_mem_alignment,
521 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100522 }
523
524 /* Then let the driver override it */
525 if (drv->bdrv_refresh_limits) {
Kevin Wolfd34682c2013-12-11 19:26:16 +0100526 return drv->bdrv_refresh_limits(bs);
527 }
528
529 return 0;
530}
531
Jim Meyeringeba25052012-05-28 09:27:54 +0200532/*
533 * Create a uniquely-named empty temporary file.
534 * Return 0 upon success, otherwise a negative errno value.
535 */
536int get_tmp_filename(char *filename, int size)
537{
bellardd5249392004-08-03 21:14:23 +0000538#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000539 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200540 /* GetTempFileName requires that its output buffer (4th param)
541 have length MAX_PATH or greater. */
542 assert(size >= MAX_PATH);
543 return (GetTempPath(MAX_PATH, temp_dir)
544 && GetTempFileName(temp_dir, "qem", 0, filename)
545 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000546#else
bellardea2384d2004-08-01 21:59:26 +0000547 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000548 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000549 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530550 if (!tmpdir) {
551 tmpdir = "/var/tmp";
552 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200553 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
554 return -EOVERFLOW;
555 }
bellardea2384d2004-08-01 21:59:26 +0000556 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800557 if (fd < 0) {
558 return -errno;
559 }
560 if (close(fd) != 0) {
561 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200562 return -errno;
563 }
564 return 0;
bellardd5249392004-08-03 21:14:23 +0000565#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200566}
bellardea2384d2004-08-01 21:59:26 +0000567
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200568/*
569 * Detect host devices. By convention, /dev/cdrom[N] is always
570 * recognized as a host CDROM.
571 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200572static BlockDriver *find_hdev_driver(const char *filename)
573{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200574 int score_max = 0, score;
575 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200576
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100577 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200578 if (d->bdrv_probe_device) {
579 score = d->bdrv_probe_device(filename);
580 if (score > score_max) {
581 score_max = score;
582 drv = d;
583 }
584 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200585 }
586
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200587 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200588}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200589
Kevin Wolf98289622013-07-10 15:47:39 +0200590BlockDriver *bdrv_find_protocol(const char *filename,
591 bool allow_protocol_prefix)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200592{
593 BlockDriver *drv1;
594 char protocol[128];
595 int len;
596 const char *p;
597
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200598 /* TODO Drivers without bdrv_file_open must be specified explicitly */
599
Christoph Hellwig39508e72010-06-23 12:25:17 +0200600 /*
601 * XXX(hch): we really should not let host device detection
602 * override an explicit protocol specification, but moving this
603 * later breaks access to device names with colons in them.
604 * Thanks to the brain-dead persistent naming schemes on udev-
605 * based Linux systems those actually are quite common.
606 */
607 drv1 = find_hdev_driver(filename);
608 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200609 return drv1;
610 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200611
Kevin Wolf98289622013-07-10 15:47:39 +0200612 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200613 return bdrv_find_format("file");
614 }
Kevin Wolf98289622013-07-10 15:47:39 +0200615
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000616 p = strchr(filename, ':');
617 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200618 len = p - filename;
619 if (len > sizeof(protocol) - 1)
620 len = sizeof(protocol) - 1;
621 memcpy(protocol, filename, len);
622 protocol[len] = '\0';
623 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
624 if (drv1->protocol_name &&
625 !strcmp(drv1->protocol_name, protocol)) {
626 return drv1;
627 }
628 }
629 return NULL;
630}
631
Kevin Wolff500a6d2012-11-12 17:35:27 +0100632static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200633 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000634{
Kevin Wolff500a6d2012-11-12 17:35:27 +0100635 int score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000636 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000637 uint8_t buf[2048];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100638 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700639
Kevin Wolf08a00552010-06-01 18:37:31 +0200640 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100641 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200642 drv = bdrv_find_format("raw");
643 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200644 error_setg(errp, "Could not find raw image format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200645 ret = -ENOENT;
646 }
647 *pdrv = drv;
648 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700649 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700650
bellard83f64092006-08-01 16:21:11 +0000651 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000652 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200653 error_setg_errno(errp, -ret, "Could not read image for determining its "
654 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200655 *pdrv = NULL;
656 return ret;
bellard83f64092006-08-01 16:21:11 +0000657 }
658
bellardea2384d2004-08-01 21:59:26 +0000659 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200660 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100661 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000662 if (drv1->bdrv_probe) {
663 score = drv1->bdrv_probe(buf, ret, filename);
664 if (score > score_max) {
665 score_max = score;
666 drv = drv1;
667 }
bellardea2384d2004-08-01 21:59:26 +0000668 }
669 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200670 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200671 error_setg(errp, "Could not determine image format: No compatible "
672 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200673 ret = -ENOENT;
674 }
675 *pdrv = drv;
676 return ret;
bellardea2384d2004-08-01 21:59:26 +0000677}
678
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100679/**
680 * Set the current 'total_sectors' value
681 */
682static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
683{
684 BlockDriver *drv = bs->drv;
685
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700686 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
687 if (bs->sg)
688 return 0;
689
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100690 /* query actual device if possible, otherwise just trust the hint */
691 if (drv->bdrv_getlength) {
692 int64_t length = drv->bdrv_getlength(bs);
693 if (length < 0) {
694 return length;
695 }
Fam Zheng7e382002013-11-06 19:48:06 +0800696 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100697 }
698
699 bs->total_sectors = hint;
700 return 0;
701}
702
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100703/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100704 * Set open flags for a given discard mode
705 *
706 * Return 0 on success, -1 if the discard mode was invalid.
707 */
708int bdrv_parse_discard_flags(const char *mode, int *flags)
709{
710 *flags &= ~BDRV_O_UNMAP;
711
712 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
713 /* do nothing */
714 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
715 *flags |= BDRV_O_UNMAP;
716 } else {
717 return -1;
718 }
719
720 return 0;
721}
722
723/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100724 * Set open flags for a given cache mode
725 *
726 * Return 0 on success, -1 if the cache mode was invalid.
727 */
728int bdrv_parse_cache_flags(const char *mode, int *flags)
729{
730 *flags &= ~BDRV_O_CACHE_MASK;
731
732 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
733 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100734 } else if (!strcmp(mode, "directsync")) {
735 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100736 } else if (!strcmp(mode, "writeback")) {
737 *flags |= BDRV_O_CACHE_WB;
738 } else if (!strcmp(mode, "unsafe")) {
739 *flags |= BDRV_O_CACHE_WB;
740 *flags |= BDRV_O_NO_FLUSH;
741 } else if (!strcmp(mode, "writethrough")) {
742 /* this is the default */
743 } else {
744 return -1;
745 }
746
747 return 0;
748}
749
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000750/**
751 * The copy-on-read flag is actually a reference count so multiple users may
752 * use the feature without worrying about clobbering its previous state.
753 * Copy-on-read stays enabled until all users have called to disable it.
754 */
755void bdrv_enable_copy_on_read(BlockDriverState *bs)
756{
757 bs->copy_on_read++;
758}
759
760void bdrv_disable_copy_on_read(BlockDriverState *bs)
761{
762 assert(bs->copy_on_read > 0);
763 bs->copy_on_read--;
764}
765
Kevin Wolf7b272452012-11-12 17:05:39 +0100766static int bdrv_open_flags(BlockDriverState *bs, int flags)
767{
768 int open_flags = flags | BDRV_O_CACHE_WB;
769
770 /*
771 * Clear flags that are internal to the block layer before opening the
772 * image.
773 */
774 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
775
776 /*
777 * Snapshots should be writable.
778 */
779 if (bs->is_temporary) {
780 open_flags |= BDRV_O_RDWR;
781 }
782
783 return open_flags;
784}
785
Benoît Canet6913c0c2014-01-23 21:31:33 +0100786static int bdrv_assign_node_name(BlockDriverState *bs,
787 const char *node_name,
788 Error **errp)
789{
790 if (!node_name) {
791 return 0;
792 }
793
794 /* empty string node name is invalid */
795 if (node_name[0] == '\0') {
796 error_setg(errp, "Empty node name");
797 return -EINVAL;
798 }
799
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100800 /* takes care of avoiding namespaces collisions */
801 if (bdrv_find(node_name)) {
802 error_setg(errp, "node-name=%s is conflicting with a device id",
803 node_name);
804 return -EINVAL;
805 }
806
Benoît Canet6913c0c2014-01-23 21:31:33 +0100807 /* takes care of avoiding duplicates node names */
808 if (bdrv_find_node(node_name)) {
809 error_setg(errp, "Duplicate node name");
810 return -EINVAL;
811 }
812
813 /* copy node name into the bs and insert it into the graph list */
814 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
815 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
816
817 return 0;
818}
819
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200820/*
Kevin Wolf57915332010-04-14 15:24:50 +0200821 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100822 *
823 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200824 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100825static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200826 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200827{
828 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200829 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100830 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200831 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200832
833 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200834 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100835 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200836
Kevin Wolf45673672013-04-22 17:48:40 +0200837 if (file != NULL) {
838 filename = file->filename;
839 } else {
840 filename = qdict_get_try_str(options, "filename");
841 }
842
Kevin Wolf765003d2014-02-03 14:49:42 +0100843 if (drv->bdrv_needs_filename && !filename) {
844 error_setg(errp, "The '%s' block driver requires a file name",
845 drv->format_name);
846 return -EINVAL;
847 }
848
Kevin Wolf45673672013-04-22 17:48:40 +0200849 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100850
Benoît Canet6913c0c2014-01-23 21:31:33 +0100851 node_name = qdict_get_try_str(options, "node-name");
852 ret = bdrv_assign_node_name(bs, node_name, errp);
853 if (ret < 0) {
854 return ret;
855 }
856 qdict_del(options, "node-name");
857
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100858 /* bdrv_open() with directly using a protocol as drv. This layer is already
859 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
860 * and return immediately. */
861 if (file != NULL && drv->bdrv_file_open) {
862 bdrv_swap(file, bs);
863 return 0;
864 }
865
Kevin Wolf57915332010-04-14 15:24:50 +0200866 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +0100867 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +0100868 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +0800869 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800870 open_flags = bdrv_open_flags(bs, flags);
871 bs->read_only = !(open_flags & BDRV_O_RDWR);
872
873 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +0200874 error_setg(errp,
875 !bs->read_only && bdrv_is_whitelisted(drv, true)
876 ? "Driver '%s' can only be used for read-only devices"
877 : "Driver '%s' is not whitelisted",
878 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800879 return -ENOTSUP;
880 }
Kevin Wolf57915332010-04-14 15:24:50 +0200881
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000882 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +0200883 if (flags & BDRV_O_COPY_ON_READ) {
884 if (!bs->read_only) {
885 bdrv_enable_copy_on_read(bs);
886 } else {
887 error_setg(errp, "Can't use copy-on-read on read-only device");
888 return -EINVAL;
889 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000890 }
891
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100892 if (filename != NULL) {
893 pstrcpy(bs->filename, sizeof(bs->filename), filename);
894 } else {
895 bs->filename[0] = '\0';
896 }
Kevin Wolf57915332010-04-14 15:24:50 +0200897
Kevin Wolf57915332010-04-14 15:24:50 +0200898 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500899 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200900
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100901 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100902
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200903 /* Open the image, either directly or using a protocol */
904 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100905 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +0200906 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200907 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +0100908 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200909 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200910 error_setg(errp, "Can't use '%s' as a block driver for the "
911 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200912 ret = -EINVAL;
913 goto free_and_fail;
914 }
Kevin Wolff500a6d2012-11-12 17:35:27 +0100915 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200916 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200917 }
918
Kevin Wolf57915332010-04-14 15:24:50 +0200919 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100920 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200921 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +0800922 } else if (bs->filename[0]) {
923 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200924 } else {
925 error_setg_errno(errp, -ret, "Could not open image");
926 }
Kevin Wolf57915332010-04-14 15:24:50 +0200927 goto free_and_fail;
928 }
929
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100930 ret = refresh_total_sectors(bs, bs->total_sectors);
931 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200932 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100933 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200934 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100935
Kevin Wolfd34682c2013-12-11 19:26:16 +0100936 bdrv_refresh_limits(bs);
Paolo Bonzinic25f53b2011-11-29 12:42:20 +0100937 assert(bdrv_opt_mem_align(bs) != 0);
938 assert(bs->request_alignment != 0);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100939
Kevin Wolf57915332010-04-14 15:24:50 +0200940#ifndef _WIN32
941 if (bs->is_temporary) {
Dunrong Huangd4cea8d2013-10-03 01:31:27 +0800942 assert(bs->filename[0] != '\0');
943 unlink(bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +0200944 }
945#endif
946 return 0;
947
948free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +0100949 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -0500950 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200951 bs->opaque = NULL;
952 bs->drv = NULL;
953 return ret;
954}
955
956/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200957 * Opens a file using a protocol (file, host_device, nbd, ...)
Kevin Wolf787e4a82013-03-06 11:52:48 +0100958 *
Max Reitz5acd9d82014-02-18 18:33:11 +0100959 * options is an indirect pointer to a QDict of options to pass to the block
960 * drivers, or pointer to NULL for an empty set of options. If this function
961 * takes ownership of the QDict reference, it will set *options to NULL;
962 * otherwise, it will contain unused/unrecognized options after this function
963 * returns. Then, the caller is responsible for freeing it. If it intends to
964 * reuse the QDict, QINCREF() should be called beforehand.
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200965 */
Max Reitzd4446ea2014-02-18 18:33:09 +0100966static int bdrv_file_open(BlockDriverState *bs, const char *filename,
Max Reitz5acd9d82014-02-18 18:33:11 +0100967 QDict **options, int flags, Error **errp)
bellardb3380822004-03-14 21:38:54 +0000968{
Christoph Hellwig6db95602010-04-05 16:53:57 +0200969 BlockDriver *drv;
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100970 const char *drvname;
Kevin Wolf98289622013-07-10 15:47:39 +0200971 bool allow_protocol_prefix = false;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200972 Error *local_err = NULL;
bellard83f64092006-08-01 16:21:11 +0000973 int ret;
974
Kevin Wolf035fccd2013-04-09 14:34:19 +0200975 /* Fetch the file name from the options QDict if necessary */
976 if (!filename) {
Max Reitz5acd9d82014-02-18 18:33:11 +0100977 filename = qdict_get_try_str(*options, "filename");
978 } else if (filename && !qdict_haskey(*options, "filename")) {
979 qdict_put(*options, "filename", qstring_from_str(filename));
Kevin Wolf98289622013-07-10 15:47:39 +0200980 allow_protocol_prefix = true;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200981 } else {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200982 error_setg(errp, "Can't specify 'file' and 'filename' options at the "
983 "same time");
Kevin Wolf035fccd2013-04-09 14:34:19 +0200984 ret = -EINVAL;
985 goto fail;
986 }
987
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100988 /* Find the right block driver */
Max Reitz5acd9d82014-02-18 18:33:11 +0100989 drvname = qdict_get_try_str(*options, "driver");
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100990 if (drvname) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +0200991 drv = bdrv_find_format(drvname);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200992 if (!drv) {
993 error_setg(errp, "Unknown driver '%s'", drvname);
994 }
Max Reitz5acd9d82014-02-18 18:33:11 +0100995 qdict_del(*options, "driver");
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100996 } else if (filename) {
Kevin Wolf98289622013-07-10 15:47:39 +0200997 drv = bdrv_find_protocol(filename, allow_protocol_prefix);
998 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200999 error_setg(errp, "Unknown protocol");
Kevin Wolf98289622013-07-10 15:47:39 +02001000 }
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001001 } else {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001002 error_setg(errp, "Must specify either driver or file");
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001003 drv = NULL;
1004 }
1005
1006 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001007 /* errp has been set already */
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001008 ret = -ENOENT;
1009 goto fail;
1010 }
1011
1012 /* Parse the filename and open it */
1013 if (drv->bdrv_parse_filename && filename) {
Max Reitz5acd9d82014-02-18 18:33:11 +01001014 drv->bdrv_parse_filename(filename, *options, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001015 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001016 error_propagate(errp, local_err);
Kevin Wolf6963a302013-03-15 18:47:22 +01001017 ret = -EINVAL;
1018 goto fail;
1019 }
Max Reitz5acd9d82014-02-18 18:33:11 +01001020 qdict_del(*options, "filename");
Kevin Wolf6963a302013-03-15 18:47:22 +01001021 }
1022
Max Reitz505d7582013-12-20 19:28:13 +01001023 if (!drv->bdrv_file_open) {
Max Reitz5acd9d82014-02-18 18:33:11 +01001024 ret = bdrv_open(&bs, filename, NULL, *options, flags, drv, &local_err);
1025 *options = NULL;
Max Reitz505d7582013-12-20 19:28:13 +01001026 } else {
Max Reitz5acd9d82014-02-18 18:33:11 +01001027 ret = bdrv_open_common(bs, NULL, *options, flags, drv, &local_err);
Max Reitz505d7582013-12-20 19:28:13 +01001028 }
Kevin Wolf707ff822013-03-06 12:20:31 +01001029 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001030 error_propagate(errp, local_err);
Kevin Wolf707ff822013-03-06 12:20:31 +01001031 goto fail;
1032 }
1033
aliguori71d07702009-03-03 17:37:16 +00001034 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +00001035 return 0;
Kevin Wolf707ff822013-03-06 12:20:31 +01001036
1037fail:
Kevin Wolf707ff822013-03-06 12:20:31 +01001038 return ret;
bellardea2384d2004-08-01 21:59:26 +00001039}
bellardfc01f7e2003-06-30 10:03:06 +00001040
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001041/*
1042 * Opens the backing file for a BlockDriverState if not yet open
1043 *
1044 * options is a QDict of options to pass to the block drivers, or NULL for an
1045 * empty set of options. The reference to the QDict is transferred to this
1046 * function (even on failure), so if the caller intends to reuse the dictionary,
1047 * it needs to use QINCREF() before calling bdrv_file_open.
1048 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001049int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001050{
1051 char backing_filename[PATH_MAX];
1052 int back_flags, ret;
1053 BlockDriver *back_drv = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001054 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001055
1056 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001057 QDECREF(options);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001058 return 0;
1059 }
1060
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001061 /* NULL means an empty set of options */
1062 if (options == NULL) {
1063 options = qdict_new();
1064 }
1065
Paolo Bonzini9156df12012-10-18 16:49:17 +02001066 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001067 if (qdict_haskey(options, "file.filename")) {
1068 backing_filename[0] = '\0';
1069 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001070 QDECREF(options);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001071 return 0;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001072 } else {
1073 bdrv_get_full_backing_filename(bs, backing_filename,
1074 sizeof(backing_filename));
Paolo Bonzini9156df12012-10-18 16:49:17 +02001075 }
1076
Paolo Bonzini9156df12012-10-18 16:49:17 +02001077 if (bs->backing_format[0] != '\0') {
1078 back_drv = bdrv_find_format(bs->backing_format);
1079 }
1080
1081 /* backing files always opened read-only */
Thibaut LAURENT87a5deb2013-10-25 02:15:07 +02001082 back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT |
1083 BDRV_O_COPY_ON_READ);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001084
Max Reitzf67503e2014-02-18 18:33:05 +01001085 assert(bs->backing_hd == NULL);
1086 ret = bdrv_open(&bs->backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001087 *backing_filename ? backing_filename : NULL, NULL, options,
Max Reitz34b5d2c2013-09-05 14:45:29 +02001088 back_flags, back_drv, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001089 if (ret < 0) {
Paolo Bonzini9156df12012-10-18 16:49:17 +02001090 bs->backing_hd = NULL;
1091 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001092 error_setg(errp, "Could not open backing file: %s",
1093 error_get_pretty(local_err));
1094 error_free(local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001095 return ret;
1096 }
Peter Feinerd80ac652014-01-08 19:43:25 +00001097
1098 if (bs->backing_hd->file) {
1099 pstrcpy(bs->backing_file, sizeof(bs->backing_file),
1100 bs->backing_hd->file->filename);
1101 }
1102
Kevin Wolfd34682c2013-12-11 19:26:16 +01001103 /* Recalculate the BlockLimits with the backing file */
1104 bdrv_refresh_limits(bs);
1105
Paolo Bonzini9156df12012-10-18 16:49:17 +02001106 return 0;
1107}
1108
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001109/*
Max Reitzda557aa2013-12-20 19:28:11 +01001110 * Opens a disk image whose options are given as BlockdevRef in another block
1111 * device's options.
1112 *
Max Reitzda557aa2013-12-20 19:28:11 +01001113 * If allow_none is true, no image will be opened if filename is false and no
1114 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1115 *
1116 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1117 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1118 * itself, all options starting with "${bdref_key}." are considered part of the
1119 * BlockdevRef.
1120 *
1121 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001122 *
1123 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001124 */
1125int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1126 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001127 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001128{
1129 QDict *image_options;
1130 int ret;
1131 char *bdref_key_dot;
1132 const char *reference;
1133
Max Reitzf67503e2014-02-18 18:33:05 +01001134 assert(pbs);
1135 assert(*pbs == NULL);
1136
Max Reitzda557aa2013-12-20 19:28:11 +01001137 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1138 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1139 g_free(bdref_key_dot);
1140
1141 reference = qdict_get_try_str(options, bdref_key);
1142 if (!filename && !reference && !qdict_size(image_options)) {
1143 if (allow_none) {
1144 ret = 0;
1145 } else {
1146 error_setg(errp, "A block device must be specified for \"%s\"",
1147 bdref_key);
1148 ret = -EINVAL;
1149 }
1150 goto done;
1151 }
1152
Max Reitzf7d9fd82014-02-18 18:33:12 +01001153 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001154
1155done:
1156 qdict_del(options, bdref_key);
1157 return ret;
1158}
1159
1160/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001161 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001162 *
1163 * options is a QDict of options to pass to the block drivers, or NULL for an
1164 * empty set of options. The reference to the QDict belongs to the block layer
1165 * after the call (even on failure), so if the caller intends to reuse the
1166 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001167 *
1168 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1169 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001170 *
1171 * The reference parameter may be used to specify an existing block device which
1172 * should be opened. If specified, neither options nor a filename may be given,
1173 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001174 */
Max Reitzddf56362014-02-18 18:33:06 +01001175int bdrv_open(BlockDriverState **pbs, const char *filename,
1176 const char *reference, QDict *options, int flags,
1177 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001178{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001179 int ret;
Stefan Weil89c9bc32012-11-22 07:25:48 +01001180 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1181 char tmp_filename[PATH_MAX + 1];
Max Reitzf67503e2014-02-18 18:33:05 +01001182 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001183 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001184 Error *local_err = NULL;
bellard712e7872005-04-28 21:09:32 +00001185
Max Reitzf67503e2014-02-18 18:33:05 +01001186 assert(pbs);
1187
Max Reitzddf56362014-02-18 18:33:06 +01001188 if (reference) {
1189 bool options_non_empty = options ? qdict_size(options) : false;
1190 QDECREF(options);
1191
1192 if (*pbs) {
1193 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1194 "another block device");
1195 return -EINVAL;
1196 }
1197
1198 if (filename || options_non_empty) {
1199 error_setg(errp, "Cannot reference an existing block device with "
1200 "additional options or a new filename");
1201 return -EINVAL;
1202 }
1203
1204 bs = bdrv_lookup_bs(reference, reference, errp);
1205 if (!bs) {
1206 return -ENODEV;
1207 }
1208 bdrv_ref(bs);
1209 *pbs = bs;
1210 return 0;
1211 }
1212
Max Reitzf67503e2014-02-18 18:33:05 +01001213 if (*pbs) {
1214 bs = *pbs;
1215 } else {
1216 bs = bdrv_new("");
1217 }
1218
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001219 /* NULL means an empty set of options */
1220 if (options == NULL) {
1221 options = qdict_new();
1222 }
1223
1224 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001225 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001226
Max Reitz5469a2a2014-02-18 18:33:10 +01001227 if (flags & BDRV_O_PROTOCOL) {
1228 assert(!drv);
Max Reitz5acd9d82014-02-18 18:33:11 +01001229 ret = bdrv_file_open(bs, filename, &options, flags & ~BDRV_O_PROTOCOL,
Max Reitz5469a2a2014-02-18 18:33:10 +01001230 &local_err);
Max Reitz5469a2a2014-02-18 18:33:10 +01001231 if (!ret) {
Max Reitz5acd9d82014-02-18 18:33:11 +01001232 goto done;
Max Reitz5469a2a2014-02-18 18:33:10 +01001233 } else if (bs->drv) {
1234 goto close_and_fail;
1235 } else {
1236 goto fail;
1237 }
1238 }
1239
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001240 /* For snapshot=on, create a temporary qcow2 overlay */
bellard83f64092006-08-01 16:21:11 +00001241 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +00001242 BlockDriverState *bs1;
1243 int64_t total_size;
Kevin Wolf91a073a2009-05-27 14:48:06 +02001244 BlockDriver *bdrv_qcow2;
Kevin Wolf08b392e2013-03-18 16:17:44 +01001245 QEMUOptionParameter *create_options;
Kevin Wolf9fd31712013-11-14 15:37:12 +01001246 QDict *snapshot_options;
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001247
bellardea2384d2004-08-01 21:59:26 +00001248 /* if snapshot, we create a temporary backing file and open it
1249 instead of opening 'filename' directly */
1250
Kevin Wolf9fd31712013-11-14 15:37:12 +01001251 /* Get the required size from the image */
Kevin Wolf9fd31712013-11-14 15:37:12 +01001252 QINCREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001253 bs1 = NULL;
Max Reitzddf56362014-02-18 18:33:06 +01001254 ret = bdrv_open(&bs1, filename, NULL, options, BDRV_O_NO_BACKING,
Kevin Wolfc9fbb992013-11-28 11:58:02 +01001255 drv, &local_err);
aliguori51d7c002009-03-05 23:00:29 +00001256 if (ret < 0) {
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001257 goto fail;
bellardea2384d2004-08-01 21:59:26 +00001258 }
Jes Sorensen3e829902010-05-27 16:20:30 +02001259 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +00001260
Fam Zheng4f6fd342013-08-23 09:14:47 +08001261 bdrv_unref(bs1);
ths3b46e622007-09-17 08:09:54 +00001262
Kevin Wolf9fd31712013-11-14 15:37:12 +01001263 /* Create the temporary image */
Jim Meyeringeba25052012-05-28 09:27:54 +02001264 ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
1265 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001266 error_setg_errno(errp, -ret, "Could not get temporary filename");
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001267 goto fail;
Jim Meyeringeba25052012-05-28 09:27:54 +02001268 }
aliguori7c96d462008-09-12 17:54:13 +00001269
Kevin Wolf91a073a2009-05-27 14:48:06 +02001270 bdrv_qcow2 = bdrv_find_format("qcow2");
Kevin Wolf08b392e2013-03-18 16:17:44 +01001271 create_options = parse_option_parameters("", bdrv_qcow2->create_options,
1272 NULL);
Kevin Wolf91a073a2009-05-27 14:48:06 +02001273
Kevin Wolf08b392e2013-03-18 16:17:44 +01001274 set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +02001275
Max Reitzcc84d902013-09-06 17:14:26 +02001276 ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options, &local_err);
Kevin Wolf08b392e2013-03-18 16:17:44 +01001277 free_option_parameters(create_options);
aliguori51d7c002009-03-05 23:00:29 +00001278 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001279 error_setg_errno(errp, -ret, "Could not create temporary overlay "
Max Reitzcc84d902013-09-06 17:14:26 +02001280 "'%s': %s", tmp_filename,
1281 error_get_pretty(local_err));
1282 error_free(local_err);
1283 local_err = NULL;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001284 goto fail;
bellardea2384d2004-08-01 21:59:26 +00001285 }
Kevin Wolf91a073a2009-05-27 14:48:06 +02001286
Kevin Wolf9fd31712013-11-14 15:37:12 +01001287 /* Prepare a new options QDict for the temporary file, where user
1288 * options refer to the backing file */
1289 if (filename) {
1290 qdict_put(options, "file.filename", qstring_from_str(filename));
1291 }
1292 if (drv) {
1293 qdict_put(options, "driver", qstring_from_str(drv->format_name));
1294 }
1295
1296 snapshot_options = qdict_new();
1297 qdict_put(snapshot_options, "backing", options);
1298 qdict_flatten(snapshot_options);
1299
1300 bs->options = snapshot_options;
1301 options = qdict_clone_shallow(bs->options);
1302
bellardea2384d2004-08-01 21:59:26 +00001303 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +02001304 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +00001305 bs->is_temporary = 1;
1306 }
bellard712e7872005-04-28 21:09:32 +00001307
Kevin Wolff500a6d2012-11-12 17:35:27 +01001308 /* Open image file without format layer */
Jeff Codybe028ad2012-09-20 15:13:17 -04001309 if (flags & BDRV_O_RDWR) {
1310 flags |= BDRV_O_ALLOW_RDWR;
1311 }
1312
Max Reitzf67503e2014-02-18 18:33:05 +01001313 assert(file == NULL);
Max Reitz054963f2013-12-20 19:28:12 +01001314 ret = bdrv_open_image(&file, filename, options, "file",
Max Reitzf7d9fd82014-02-18 18:33:12 +01001315 bdrv_open_flags(bs, flags | BDRV_O_UNMAP) |
1316 BDRV_O_PROTOCOL, true, &local_err);
Max Reitz054963f2013-12-20 19:28:12 +01001317 if (ret < 0) {
1318 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001319 }
1320
1321 /* Find the right image format driver */
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001322 drvname = qdict_get_try_str(options, "driver");
1323 if (drvname) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +02001324 drv = bdrv_find_format(drvname);
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001325 qdict_del(options, "driver");
Kevin Wolf06d22aa2013-08-08 17:44:52 +02001326 if (!drv) {
1327 error_setg(errp, "Invalid driver: '%s'", drvname);
1328 ret = -EINVAL;
1329 goto unlink_and_fail;
1330 }
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001331 }
1332
Kevin Wolff500a6d2012-11-12 17:35:27 +01001333 if (!drv) {
Max Reitz2a05cbe2013-12-20 19:28:10 +01001334 if (file) {
1335 ret = find_image_format(file, filename, &drv, &local_err);
1336 } else {
1337 error_setg(errp, "Must specify either driver or file");
1338 ret = -EINVAL;
1339 goto unlink_and_fail;
1340 }
Kevin Wolff500a6d2012-11-12 17:35:27 +01001341 }
1342
1343 if (!drv) {
1344 goto unlink_and_fail;
1345 }
1346
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001347 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001348 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001349 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +01001350 goto unlink_and_fail;
1351 }
1352
Max Reitz2a05cbe2013-12-20 19:28:10 +01001353 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001354 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001355 file = NULL;
1356 }
1357
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001358 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001359 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001360 QDict *backing_options;
1361
Benoît Canet5726d872013-09-25 13:30:01 +02001362 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001363 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001364 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001365 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001366 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001367 }
1368
Max Reitz5acd9d82014-02-18 18:33:11 +01001369done:
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001370 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001371 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001372 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001373 if (flags & BDRV_O_PROTOCOL) {
1374 error_setg(errp, "Block protocol '%s' doesn't support the option "
1375 "'%s'", drv->format_name, entry->key);
1376 } else {
1377 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1378 "support the option '%s'", drv->format_name,
1379 bs->device_name, entry->key);
1380 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001381
1382 ret = -EINVAL;
1383 goto close_and_fail;
1384 }
1385 QDECREF(options);
1386
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001387 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001388 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001389 }
1390
Max Reitzf67503e2014-02-18 18:33:05 +01001391 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001392 return 0;
1393
1394unlink_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001395 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001396 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001397 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001398 if (bs->is_temporary) {
1399 unlink(filename);
1400 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001401fail:
1402 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001403 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001404 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001405 if (!*pbs) {
1406 /* If *pbs is NULL, a new BDS has been created in this function and
1407 needs to be freed now. Otherwise, it does not need to be closed,
1408 since it has not really been opened yet. */
1409 bdrv_unref(bs);
1410 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001411 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001412 error_propagate(errp, local_err);
1413 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001414 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001415
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001416close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001417 /* See fail path, but now the BDS has to be always closed */
1418 if (*pbs) {
1419 bdrv_close(bs);
1420 } else {
1421 bdrv_unref(bs);
1422 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001423 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001424 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001425 error_propagate(errp, local_err);
1426 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001427 return ret;
1428}
1429
Jeff Codye971aa12012-09-20 15:13:19 -04001430typedef struct BlockReopenQueueEntry {
1431 bool prepared;
1432 BDRVReopenState state;
1433 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1434} BlockReopenQueueEntry;
1435
1436/*
1437 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1438 * reopen of multiple devices.
1439 *
1440 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1441 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1442 * be created and initialized. This newly created BlockReopenQueue should be
1443 * passed back in for subsequent calls that are intended to be of the same
1444 * atomic 'set'.
1445 *
1446 * bs is the BlockDriverState to add to the reopen queue.
1447 *
1448 * flags contains the open flags for the associated bs
1449 *
1450 * returns a pointer to bs_queue, which is either the newly allocated
1451 * bs_queue, or the existing bs_queue being used.
1452 *
1453 */
1454BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1455 BlockDriverState *bs, int flags)
1456{
1457 assert(bs != NULL);
1458
1459 BlockReopenQueueEntry *bs_entry;
1460 if (bs_queue == NULL) {
1461 bs_queue = g_new0(BlockReopenQueue, 1);
1462 QSIMPLEQ_INIT(bs_queue);
1463 }
1464
1465 if (bs->file) {
1466 bdrv_reopen_queue(bs_queue, bs->file, flags);
1467 }
1468
1469 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1470 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1471
1472 bs_entry->state.bs = bs;
1473 bs_entry->state.flags = flags;
1474
1475 return bs_queue;
1476}
1477
1478/*
1479 * Reopen multiple BlockDriverStates atomically & transactionally.
1480 *
1481 * The queue passed in (bs_queue) must have been built up previous
1482 * via bdrv_reopen_queue().
1483 *
1484 * Reopens all BDS specified in the queue, with the appropriate
1485 * flags. All devices are prepared for reopen, and failure of any
1486 * device will cause all device changes to be abandonded, and intermediate
1487 * data cleaned up.
1488 *
1489 * If all devices prepare successfully, then the changes are committed
1490 * to all devices.
1491 *
1492 */
1493int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1494{
1495 int ret = -1;
1496 BlockReopenQueueEntry *bs_entry, *next;
1497 Error *local_err = NULL;
1498
1499 assert(bs_queue != NULL);
1500
1501 bdrv_drain_all();
1502
1503 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1504 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1505 error_propagate(errp, local_err);
1506 goto cleanup;
1507 }
1508 bs_entry->prepared = true;
1509 }
1510
1511 /* If we reach this point, we have success and just need to apply the
1512 * changes
1513 */
1514 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1515 bdrv_reopen_commit(&bs_entry->state);
1516 }
1517
1518 ret = 0;
1519
1520cleanup:
1521 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1522 if (ret && bs_entry->prepared) {
1523 bdrv_reopen_abort(&bs_entry->state);
1524 }
1525 g_free(bs_entry);
1526 }
1527 g_free(bs_queue);
1528 return ret;
1529}
1530
1531
1532/* Reopen a single BlockDriverState with the specified flags. */
1533int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1534{
1535 int ret = -1;
1536 Error *local_err = NULL;
1537 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1538
1539 ret = bdrv_reopen_multiple(queue, &local_err);
1540 if (local_err != NULL) {
1541 error_propagate(errp, local_err);
1542 }
1543 return ret;
1544}
1545
1546
1547/*
1548 * Prepares a BlockDriverState for reopen. All changes are staged in the
1549 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1550 * the block driver layer .bdrv_reopen_prepare()
1551 *
1552 * bs is the BlockDriverState to reopen
1553 * flags are the new open flags
1554 * queue is the reopen queue
1555 *
1556 * Returns 0 on success, non-zero on error. On error errp will be set
1557 * as well.
1558 *
1559 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1560 * It is the responsibility of the caller to then call the abort() or
1561 * commit() for any other BDS that have been left in a prepare() state
1562 *
1563 */
1564int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1565 Error **errp)
1566{
1567 int ret = -1;
1568 Error *local_err = NULL;
1569 BlockDriver *drv;
1570
1571 assert(reopen_state != NULL);
1572 assert(reopen_state->bs->drv != NULL);
1573 drv = reopen_state->bs->drv;
1574
1575 /* if we are to stay read-only, do not allow permission change
1576 * to r/w */
1577 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1578 reopen_state->flags & BDRV_O_RDWR) {
1579 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1580 reopen_state->bs->device_name);
1581 goto error;
1582 }
1583
1584
1585 ret = bdrv_flush(reopen_state->bs);
1586 if (ret) {
1587 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1588 strerror(-ret));
1589 goto error;
1590 }
1591
1592 if (drv->bdrv_reopen_prepare) {
1593 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1594 if (ret) {
1595 if (local_err != NULL) {
1596 error_propagate(errp, local_err);
1597 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001598 error_setg(errp, "failed while preparing to reopen image '%s'",
1599 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001600 }
1601 goto error;
1602 }
1603 } else {
1604 /* It is currently mandatory to have a bdrv_reopen_prepare()
1605 * handler for each supported drv. */
1606 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1607 drv->format_name, reopen_state->bs->device_name,
1608 "reopening of file");
1609 ret = -1;
1610 goto error;
1611 }
1612
1613 ret = 0;
1614
1615error:
1616 return ret;
1617}
1618
1619/*
1620 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1621 * makes them final by swapping the staging BlockDriverState contents into
1622 * the active BlockDriverState contents.
1623 */
1624void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1625{
1626 BlockDriver *drv;
1627
1628 assert(reopen_state != NULL);
1629 drv = reopen_state->bs->drv;
1630 assert(drv != NULL);
1631
1632 /* If there are any driver level actions to take */
1633 if (drv->bdrv_reopen_commit) {
1634 drv->bdrv_reopen_commit(reopen_state);
1635 }
1636
1637 /* set BDS specific flags now */
1638 reopen_state->bs->open_flags = reopen_state->flags;
1639 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1640 BDRV_O_CACHE_WB);
1641 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001642
1643 bdrv_refresh_limits(reopen_state->bs);
Jeff Codye971aa12012-09-20 15:13:19 -04001644}
1645
1646/*
1647 * Abort the reopen, and delete and free the staged changes in
1648 * reopen_state
1649 */
1650void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1651{
1652 BlockDriver *drv;
1653
1654 assert(reopen_state != NULL);
1655 drv = reopen_state->bs->drv;
1656 assert(drv != NULL);
1657
1658 if (drv->bdrv_reopen_abort) {
1659 drv->bdrv_reopen_abort(reopen_state);
1660 }
1661}
1662
1663
bellardfc01f7e2003-06-30 10:03:06 +00001664void bdrv_close(BlockDriverState *bs)
1665{
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001666 if (bs->job) {
1667 block_job_cancel_sync(bs->job);
1668 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001669 bdrv_drain_all(); /* complete I/O */
1670 bdrv_flush(bs);
1671 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001672 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001673
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001674 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001675 if (bs->backing_hd) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001676 bdrv_unref(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001677 bs->backing_hd = NULL;
1678 }
bellardea2384d2004-08-01 21:59:26 +00001679 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001680 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001681#ifdef _WIN32
1682 if (bs->is_temporary) {
1683 unlink(bs->filename);
1684 }
bellard67b915a2004-03-31 23:37:16 +00001685#endif
bellardea2384d2004-08-01 21:59:26 +00001686 bs->opaque = NULL;
1687 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001688 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001689 bs->backing_file[0] = '\0';
1690 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001691 bs->total_sectors = 0;
1692 bs->encrypted = 0;
1693 bs->valid_key = 0;
1694 bs->sg = 0;
1695 bs->growable = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001696 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001697 QDECREF(bs->options);
1698 bs->options = NULL;
bellardb3380822004-03-14 21:38:54 +00001699
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001700 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001701 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001702 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001703 }
bellardb3380822004-03-14 21:38:54 +00001704 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001705
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001706 bdrv_dev_change_media_cb(bs, false);
1707
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001708 /*throttling disk I/O limits*/
1709 if (bs->io_limits_enabled) {
1710 bdrv_io_limits_disable(bs);
1711 }
bellardb3380822004-03-14 21:38:54 +00001712}
1713
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001714void bdrv_close_all(void)
1715{
1716 BlockDriverState *bs;
1717
Benoît Canetdc364f42014-01-23 21:31:32 +01001718 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001719 bdrv_close(bs);
1720 }
1721}
1722
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001723/* Check if any requests are in-flight (including throttled requests) */
1724static bool bdrv_requests_pending(BlockDriverState *bs)
1725{
1726 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1727 return true;
1728 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001729 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1730 return true;
1731 }
1732 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001733 return true;
1734 }
1735 if (bs->file && bdrv_requests_pending(bs->file)) {
1736 return true;
1737 }
1738 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1739 return true;
1740 }
1741 return false;
1742}
1743
1744static bool bdrv_requests_pending_all(void)
1745{
1746 BlockDriverState *bs;
Benoît Canetdc364f42014-01-23 21:31:32 +01001747 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001748 if (bdrv_requests_pending(bs)) {
1749 return true;
1750 }
1751 }
1752 return false;
1753}
1754
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001755/*
1756 * Wait for pending requests to complete across all BlockDriverStates
1757 *
1758 * This function does not flush data to disk, use bdrv_flush_all() for that
1759 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001760 *
1761 * Note that completion of an asynchronous I/O operation can trigger any
1762 * number of other I/O operations on other devices---for example a coroutine
1763 * can be arbitrarily complex and a constant flow of I/O can come until the
1764 * coroutine is complete. Because of this, it is not possible to have a
1765 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001766 */
1767void bdrv_drain_all(void)
1768{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001769 /* Always run first iteration so any pending completion BHs run */
1770 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001771 BlockDriverState *bs;
1772
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001773 while (busy) {
Benoît Canetdc364f42014-01-23 21:31:32 +01001774 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczi0b06ef32013-11-26 16:18:00 +01001775 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001776 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001777
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001778 busy = bdrv_requests_pending_all();
1779 busy |= aio_poll(qemu_get_aio_context(), busy);
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001780 }
1781}
1782
Benoît Canetdc364f42014-01-23 21:31:32 +01001783/* make a BlockDriverState anonymous by removing from bdrv_state and
1784 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05001785 Also, NULL terminate the device_name to prevent double remove */
1786void bdrv_make_anon(BlockDriverState *bs)
1787{
1788 if (bs->device_name[0] != '\0') {
Benoît Canetdc364f42014-01-23 21:31:32 +01001789 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Ryan Harperd22b2f42011-03-29 20:51:47 -05001790 }
1791 bs->device_name[0] = '\0';
Benoît Canetdc364f42014-01-23 21:31:32 +01001792 if (bs->node_name[0] != '\0') {
1793 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1794 }
1795 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05001796}
1797
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02001798static void bdrv_rebind(BlockDriverState *bs)
1799{
1800 if (bs->drv && bs->drv->bdrv_rebind) {
1801 bs->drv->bdrv_rebind(bs);
1802 }
1803}
1804
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001805static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1806 BlockDriverState *bs_src)
1807{
1808 /* move some fields that need to stay attached to the device */
1809 bs_dest->open_flags = bs_src->open_flags;
1810
1811 /* dev info */
1812 bs_dest->dev_ops = bs_src->dev_ops;
1813 bs_dest->dev_opaque = bs_src->dev_opaque;
1814 bs_dest->dev = bs_src->dev;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001815 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001816 bs_dest->copy_on_read = bs_src->copy_on_read;
1817
1818 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1819
Benoît Canetcc0681c2013-09-02 14:14:39 +02001820 /* i/o throttled req */
1821 memcpy(&bs_dest->throttle_state,
1822 &bs_src->throttle_state,
1823 sizeof(ThrottleState));
1824 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1825 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001826 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1827
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001828 /* r/w error */
1829 bs_dest->on_read_error = bs_src->on_read_error;
1830 bs_dest->on_write_error = bs_src->on_write_error;
1831
1832 /* i/o status */
1833 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1834 bs_dest->iostatus = bs_src->iostatus;
1835
1836 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08001837 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001838
Fam Zheng9fcb0252013-08-23 09:14:46 +08001839 /* reference count */
1840 bs_dest->refcnt = bs_src->refcnt;
1841
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001842 /* job */
1843 bs_dest->in_use = bs_src->in_use;
1844 bs_dest->job = bs_src->job;
1845
1846 /* keep the same entry in bdrv_states */
1847 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
1848 bs_src->device_name);
Benoît Canetdc364f42014-01-23 21:31:32 +01001849 bs_dest->device_list = bs_src->device_list;
1850
1851 /* keep the same entry in graph_bdrv_states
1852 * We do want to swap name but don't want to swap linked list entries
1853 */
1854 bs_dest->node_list = bs_src->node_list;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001855}
1856
1857/*
1858 * Swap bs contents for two image chains while they are live,
1859 * while keeping required fields on the BlockDriverState that is
1860 * actually attached to a device.
1861 *
1862 * This will modify the BlockDriverState fields, and swap contents
1863 * between bs_new and bs_old. Both bs_new and bs_old are modified.
1864 *
1865 * bs_new is required to be anonymous.
1866 *
1867 * This function does not create any image files.
1868 */
1869void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1870{
1871 BlockDriverState tmp;
1872
1873 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
1874 assert(bs_new->device_name[0] == '\0');
Fam Zhenge4654d22013-11-13 18:29:43 +08001875 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001876 assert(bs_new->job == NULL);
1877 assert(bs_new->dev == NULL);
1878 assert(bs_new->in_use == 0);
1879 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02001880 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001881
1882 tmp = *bs_new;
1883 *bs_new = *bs_old;
1884 *bs_old = tmp;
1885
1886 /* there are some fields that should not be swapped, move them back */
1887 bdrv_move_feature_fields(&tmp, bs_old);
1888 bdrv_move_feature_fields(bs_old, bs_new);
1889 bdrv_move_feature_fields(bs_new, &tmp);
1890
1891 /* bs_new shouldn't be in bdrv_states even after the swap! */
1892 assert(bs_new->device_name[0] == '\0');
1893
1894 /* Check a few fields that should remain attached to the device */
1895 assert(bs_new->dev == NULL);
1896 assert(bs_new->job == NULL);
1897 assert(bs_new->in_use == 0);
1898 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02001899 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001900
1901 bdrv_rebind(bs_new);
1902 bdrv_rebind(bs_old);
1903}
1904
Jeff Cody8802d1f2012-02-28 15:54:06 -05001905/*
1906 * Add new bs contents at the top of an image chain while the chain is
1907 * live, while keeping required fields on the top layer.
1908 *
1909 * This will modify the BlockDriverState fields, and swap contents
1910 * between bs_new and bs_top. Both bs_new and bs_top are modified.
1911 *
Jeff Codyf6801b82012-03-27 16:30:19 -04001912 * bs_new is required to be anonymous.
1913 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05001914 * This function does not create any image files.
1915 */
1916void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1917{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001918 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05001919
1920 /* The contents of 'tmp' will become bs_top, as we are
1921 * swapping bs_new and bs_top contents. */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001922 bs_top->backing_hd = bs_new;
1923 bs_top->open_flags &= ~BDRV_O_NO_BACKING;
1924 pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
1925 bs_new->filename);
1926 pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
1927 bs_new->drv ? bs_new->drv->format_name : "");
Jeff Cody8802d1f2012-02-28 15:54:06 -05001928}
1929
Fam Zheng4f6fd342013-08-23 09:14:47 +08001930static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00001931{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001932 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +02001933 assert(!bs->job);
1934 assert(!bs->in_use);
Fam Zheng4f6fd342013-08-23 09:14:47 +08001935 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08001936 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02001937
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02001938 bdrv_close(bs);
1939
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001940 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05001941 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00001942
Anthony Liguori7267c092011-08-20 22:09:37 -05001943 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00001944}
1945
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001946int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1947/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +02001948{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001949 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +02001950 return -EBUSY;
1951 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001952 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -03001953 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +02001954 return 0;
1955}
1956
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001957/* TODO qdevified devices don't use this, remove when devices are qdevified */
1958void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02001959{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001960 if (bdrv_attach_dev(bs, dev) < 0) {
1961 abort();
1962 }
1963}
1964
1965void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1966/* TODO change to DeviceState *dev when all users are qdevified */
1967{
1968 assert(bs->dev == dev);
1969 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02001970 bs->dev_ops = NULL;
1971 bs->dev_opaque = NULL;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001972 bs->guest_block_size = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +02001973}
1974
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001975/* TODO change to return DeviceState * when all users are qdevified */
1976void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02001977{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001978 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02001979}
1980
Markus Armbruster0e49de52011-08-03 15:07:41 +02001981void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1982 void *opaque)
1983{
1984 bs->dev_ops = ops;
1985 bs->dev_opaque = opaque;
1986}
1987
Paolo Bonzini32c81a42012-09-28 17:22:58 +02001988void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1989 enum MonitorEvent ev,
1990 BlockErrorAction action, bool is_read)
Luiz Capitulino329c0a42012-01-25 16:59:43 -02001991{
1992 QObject *data;
1993 const char *action_str;
1994
1995 switch (action) {
1996 case BDRV_ACTION_REPORT:
1997 action_str = "report";
1998 break;
1999 case BDRV_ACTION_IGNORE:
2000 action_str = "ignore";
2001 break;
2002 case BDRV_ACTION_STOP:
2003 action_str = "stop";
2004 break;
2005 default:
2006 abort();
2007 }
2008
2009 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2010 bdrv->device_name,
2011 action_str,
2012 is_read ? "read" : "write");
Paolo Bonzini32c81a42012-09-28 17:22:58 +02002013 monitor_protocol_event(ev, data);
Luiz Capitulino329c0a42012-01-25 16:59:43 -02002014
2015 qobject_decref(data);
2016}
2017
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002018static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
2019{
2020 QObject *data;
2021
2022 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
2023 bdrv_get_device_name(bs), ejected);
2024 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
2025
2026 qobject_decref(data);
2027}
2028
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002029static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02002030{
Markus Armbruster145feb12011-08-03 15:07:42 +02002031 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002032 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002033 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002034 if (tray_was_closed) {
2035 /* tray open */
2036 bdrv_emit_qmp_eject_event(bs, true);
2037 }
2038 if (load) {
2039 /* tray close */
2040 bdrv_emit_qmp_eject_event(bs, false);
2041 }
Markus Armbruster145feb12011-08-03 15:07:42 +02002042 }
2043}
2044
Markus Armbruster2c6942f2011-09-06 18:58:51 +02002045bool bdrv_dev_has_removable_media(BlockDriverState *bs)
2046{
2047 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
2048}
2049
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01002050void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
2051{
2052 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
2053 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
2054 }
2055}
2056
Markus Armbrustere4def802011-09-06 18:58:53 +02002057bool bdrv_dev_is_tray_open(BlockDriverState *bs)
2058{
2059 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
2060 return bs->dev_ops->is_tray_open(bs->dev_opaque);
2061 }
2062 return false;
2063}
2064
Markus Armbruster145feb12011-08-03 15:07:42 +02002065static void bdrv_dev_resize_cb(BlockDriverState *bs)
2066{
2067 if (bs->dev_ops && bs->dev_ops->resize_cb) {
2068 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02002069 }
2070}
2071
Markus Armbrusterf1076392011-09-06 18:58:46 +02002072bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
2073{
2074 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
2075 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
2076 }
2077 return false;
2078}
2079
aliguorie97fc192009-04-21 23:11:50 +00002080/*
2081 * Run consistency checks on an image
2082 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002083 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002084 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002085 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002086 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002087int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002088{
2089 if (bs->drv->bdrv_check == NULL) {
2090 return -ENOTSUP;
2091 }
2092
Kevin Wolfe076f332010-06-29 11:43:13 +02002093 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002094 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002095}
2096
Kevin Wolf8a426612010-07-16 17:17:01 +02002097#define COMMIT_BUF_SECTORS 2048
2098
bellard33e39632003-07-06 17:15:21 +00002099/* commit COW file into the raw image */
2100int bdrv_commit(BlockDriverState *bs)
2101{
bellard19cb3732006-08-19 11:45:59 +00002102 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002103 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002104 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002105 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002106 uint8_t *buf = NULL;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002107 char filename[PATH_MAX];
bellard33e39632003-07-06 17:15:21 +00002108
bellard19cb3732006-08-19 11:45:59 +00002109 if (!drv)
2110 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002111
2112 if (!bs->backing_hd) {
2113 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002114 }
2115
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002116 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
2117 return -EBUSY;
2118 }
2119
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002120 ro = bs->backing_hd->read_only;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002121 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2122 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002123 open_flags = bs->backing_hd->open_flags;
2124
2125 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002126 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2127 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002128 }
bellard33e39632003-07-06 17:15:21 +00002129 }
bellardea2384d2004-08-01 21:59:26 +00002130
Jeff Cody72706ea2014-01-24 09:02:35 -05002131 length = bdrv_getlength(bs);
2132 if (length < 0) {
2133 ret = length;
2134 goto ro_cleanup;
2135 }
2136
2137 backing_length = bdrv_getlength(bs->backing_hd);
2138 if (backing_length < 0) {
2139 ret = backing_length;
2140 goto ro_cleanup;
2141 }
2142
2143 /* If our top snapshot is larger than the backing file image,
2144 * grow the backing file image if possible. If not possible,
2145 * we must return an error */
2146 if (length > backing_length) {
2147 ret = bdrv_truncate(bs->backing_hd, length);
2148 if (ret < 0) {
2149 goto ro_cleanup;
2150 }
2151 }
2152
2153 total_sectors = length >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05002154 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00002155
Kevin Wolf8a426612010-07-16 17:17:01 +02002156 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002157 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2158 if (ret < 0) {
2159 goto ro_cleanup;
2160 }
2161 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002162 ret = bdrv_read(bs, sector, buf, n);
2163 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002164 goto ro_cleanup;
2165 }
2166
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002167 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2168 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002169 goto ro_cleanup;
2170 }
bellardea2384d2004-08-01 21:59:26 +00002171 }
2172 }
bellard95389c82005-12-18 18:28:15 +00002173
Christoph Hellwig1d449522010-01-17 12:32:30 +01002174 if (drv->bdrv_make_empty) {
2175 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002176 if (ret < 0) {
2177 goto ro_cleanup;
2178 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002179 bdrv_flush(bs);
2180 }
bellard95389c82005-12-18 18:28:15 +00002181
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002182 /*
2183 * Make sure all data we wrote to the backing device is actually
2184 * stable on disk.
2185 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002186 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002187 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002188 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002189
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002190 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002191ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05002192 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002193
2194 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002195 /* ignoring error return here */
2196 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002197 }
2198
Christoph Hellwig1d449522010-01-17 12:32:30 +01002199 return ret;
bellard33e39632003-07-06 17:15:21 +00002200}
2201
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002202int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002203{
2204 BlockDriverState *bs;
2205
Benoît Canetdc364f42014-01-23 21:31:32 +01002206 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Jeff Cody272d2d82013-02-26 09:55:48 -05002207 if (bs->drv && bs->backing_hd) {
2208 int ret = bdrv_commit(bs);
2209 if (ret < 0) {
2210 return ret;
2211 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002212 }
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002213 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002214 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002215}
2216
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002217/**
2218 * Remove an active request from the tracked requests list
2219 *
2220 * This function should be called when a tracked request is completing.
2221 */
2222static void tracked_request_end(BdrvTrackedRequest *req)
2223{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002224 if (req->serialising) {
2225 req->bs->serialising_in_flight--;
2226 }
2227
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002228 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002229 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002230}
2231
2232/**
2233 * Add an active request to the tracked requests list
2234 */
2235static void tracked_request_begin(BdrvTrackedRequest *req,
2236 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002237 int64_t offset,
2238 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002239{
2240 *req = (BdrvTrackedRequest){
2241 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002242 .offset = offset,
2243 .bytes = bytes,
2244 .is_write = is_write,
2245 .co = qemu_coroutine_self(),
2246 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002247 .overlap_offset = offset,
2248 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002249 };
2250
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002251 qemu_co_queue_init(&req->wait_queue);
2252
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002253 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2254}
2255
Kevin Wolfe96126f2014-02-08 10:42:18 +01002256static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002257{
Kevin Wolf73271452013-12-04 17:08:50 +01002258 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002259 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2260 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002261
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002262 if (!req->serialising) {
2263 req->bs->serialising_in_flight++;
2264 req->serialising = true;
2265 }
Kevin Wolf73271452013-12-04 17:08:50 +01002266
2267 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2268 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002269}
2270
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002271/**
2272 * Round a region to cluster boundaries
2273 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002274void bdrv_round_to_clusters(BlockDriverState *bs,
2275 int64_t sector_num, int nb_sectors,
2276 int64_t *cluster_sector_num,
2277 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002278{
2279 BlockDriverInfo bdi;
2280
2281 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2282 *cluster_sector_num = sector_num;
2283 *cluster_nb_sectors = nb_sectors;
2284 } else {
2285 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2286 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2287 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2288 nb_sectors, c);
2289 }
2290}
2291
Kevin Wolf73271452013-12-04 17:08:50 +01002292static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002293{
2294 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002295 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002296
Kevin Wolf73271452013-12-04 17:08:50 +01002297 ret = bdrv_get_info(bs, &bdi);
2298 if (ret < 0 || bdi.cluster_size == 0) {
2299 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002300 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002301 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002302 }
2303}
2304
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002305static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002306 int64_t offset, unsigned int bytes)
2307{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002308 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002309 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002310 return false;
2311 }
2312 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002313 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002314 return false;
2315 }
2316 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002317}
2318
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002319static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002320{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002321 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002322 BdrvTrackedRequest *req;
2323 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002324 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002325
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002326 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002327 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002328 }
2329
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002330 do {
2331 retry = false;
2332 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002333 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002334 continue;
2335 }
Kevin Wolf73271452013-12-04 17:08:50 +01002336 if (tracked_request_overlaps(req, self->overlap_offset,
2337 self->overlap_bytes))
2338 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002339 /* Hitting this means there was a reentrant request, for
2340 * example, a block driver issuing nested requests. This must
2341 * never happen since it means deadlock.
2342 */
2343 assert(qemu_coroutine_self() != req->co);
2344
Kevin Wolf64604402013-12-13 13:04:35 +01002345 /* If the request is already (indirectly) waiting for us, or
2346 * will wait for us as soon as it wakes up, then just go on
2347 * (instead of producing a deadlock in the former case). */
2348 if (!req->waiting_for) {
2349 self->waiting_for = req;
2350 qemu_co_queue_wait(&req->wait_queue);
2351 self->waiting_for = NULL;
2352 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002353 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002354 break;
2355 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002356 }
2357 }
2358 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002359
2360 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002361}
2362
Kevin Wolf756e6732010-01-12 12:55:17 +01002363/*
2364 * Return values:
2365 * 0 - success
2366 * -EINVAL - backing format specified, but no file
2367 * -ENOSPC - can't update the backing file because no space is left in the
2368 * image file header
2369 * -ENOTSUP - format driver doesn't support changing the backing file
2370 */
2371int bdrv_change_backing_file(BlockDriverState *bs,
2372 const char *backing_file, const char *backing_fmt)
2373{
2374 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002375 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002376
Paolo Bonzini5f377792012-04-12 14:01:01 +02002377 /* Backing file format doesn't make sense without a backing file */
2378 if (backing_fmt && !backing_file) {
2379 return -EINVAL;
2380 }
2381
Kevin Wolf756e6732010-01-12 12:55:17 +01002382 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002383 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002384 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002385 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002386 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002387
2388 if (ret == 0) {
2389 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2390 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2391 }
2392 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002393}
2394
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002395/*
2396 * Finds the image layer in the chain that has 'bs' as its backing file.
2397 *
2398 * active is the current topmost image.
2399 *
2400 * Returns NULL if bs is not found in active's image chain,
2401 * or if active == bs.
2402 */
2403BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2404 BlockDriverState *bs)
2405{
2406 BlockDriverState *overlay = NULL;
2407 BlockDriverState *intermediate;
2408
2409 assert(active != NULL);
2410 assert(bs != NULL);
2411
2412 /* if bs is the same as active, then by definition it has no overlay
2413 */
2414 if (active == bs) {
2415 return NULL;
2416 }
2417
2418 intermediate = active;
2419 while (intermediate->backing_hd) {
2420 if (intermediate->backing_hd == bs) {
2421 overlay = intermediate;
2422 break;
2423 }
2424 intermediate = intermediate->backing_hd;
2425 }
2426
2427 return overlay;
2428}
2429
2430typedef struct BlkIntermediateStates {
2431 BlockDriverState *bs;
2432 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2433} BlkIntermediateStates;
2434
2435
2436/*
2437 * Drops images above 'base' up to and including 'top', and sets the image
2438 * above 'top' to have base as its backing file.
2439 *
2440 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2441 * information in 'bs' can be properly updated.
2442 *
2443 * E.g., this will convert the following chain:
2444 * bottom <- base <- intermediate <- top <- active
2445 *
2446 * to
2447 *
2448 * bottom <- base <- active
2449 *
2450 * It is allowed for bottom==base, in which case it converts:
2451 *
2452 * base <- intermediate <- top <- active
2453 *
2454 * to
2455 *
2456 * base <- active
2457 *
2458 * Error conditions:
2459 * if active == top, that is considered an error
2460 *
2461 */
2462int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2463 BlockDriverState *base)
2464{
2465 BlockDriverState *intermediate;
2466 BlockDriverState *base_bs = NULL;
2467 BlockDriverState *new_top_bs = NULL;
2468 BlkIntermediateStates *intermediate_state, *next;
2469 int ret = -EIO;
2470
2471 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2472 QSIMPLEQ_INIT(&states_to_delete);
2473
2474 if (!top->drv || !base->drv) {
2475 goto exit;
2476 }
2477
2478 new_top_bs = bdrv_find_overlay(active, top);
2479
2480 if (new_top_bs == NULL) {
2481 /* we could not find the image above 'top', this is an error */
2482 goto exit;
2483 }
2484
2485 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2486 * to do, no intermediate images */
2487 if (new_top_bs->backing_hd == base) {
2488 ret = 0;
2489 goto exit;
2490 }
2491
2492 intermediate = top;
2493
2494 /* now we will go down through the list, and add each BDS we find
2495 * into our deletion queue, until we hit the 'base'
2496 */
2497 while (intermediate) {
2498 intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
2499 intermediate_state->bs = intermediate;
2500 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2501
2502 if (intermediate->backing_hd == base) {
2503 base_bs = intermediate->backing_hd;
2504 break;
2505 }
2506 intermediate = intermediate->backing_hd;
2507 }
2508 if (base_bs == NULL) {
2509 /* something went wrong, we did not end at the base. safely
2510 * unravel everything, and exit with error */
2511 goto exit;
2512 }
2513
2514 /* success - we can delete the intermediate states, and link top->base */
2515 ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
2516 base_bs->drv ? base_bs->drv->format_name : "");
2517 if (ret) {
2518 goto exit;
2519 }
2520 new_top_bs->backing_hd = base_bs;
2521
Kevin Wolf355ef4a2013-12-11 20:14:09 +01002522 bdrv_refresh_limits(new_top_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002523
2524 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2525 /* so that bdrv_close() does not recursively close the chain */
2526 intermediate_state->bs->backing_hd = NULL;
Fam Zheng4f6fd342013-08-23 09:14:47 +08002527 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002528 }
2529 ret = 0;
2530
2531exit:
2532 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2533 g_free(intermediate_state);
2534 }
2535 return ret;
2536}
2537
2538
aliguori71d07702009-03-03 17:37:16 +00002539static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2540 size_t size)
2541{
2542 int64_t len;
2543
2544 if (!bdrv_is_inserted(bs))
2545 return -ENOMEDIUM;
2546
2547 if (bs->growable)
2548 return 0;
2549
2550 len = bdrv_getlength(bs);
2551
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002552 if (offset < 0)
2553 return -EIO;
2554
2555 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00002556 return -EIO;
2557
2558 return 0;
2559}
2560
2561static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2562 int nb_sectors)
2563{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002564 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2565 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002566}
2567
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002568typedef struct RwCo {
2569 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002570 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002571 QEMUIOVector *qiov;
2572 bool is_write;
2573 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002574 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002575} RwCo;
2576
2577static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2578{
2579 RwCo *rwco = opaque;
2580
2581 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002582 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2583 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002584 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002585 } else {
2586 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2587 rwco->qiov->size, rwco->qiov,
2588 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002589 }
2590}
2591
2592/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002593 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002594 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002595static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2596 QEMUIOVector *qiov, bool is_write,
2597 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002598{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002599 Coroutine *co;
2600 RwCo rwco = {
2601 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002602 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002603 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002604 .is_write = is_write,
2605 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002606 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002607 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002608
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002609 /**
2610 * In sync call context, when the vcpu is blocked, this throttling timer
2611 * will not fire; so the I/O throttling function has to be disabled here
2612 * if it has been enabled.
2613 */
2614 if (bs->io_limits_enabled) {
2615 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2616 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2617 bdrv_io_limits_disable(bs);
2618 }
2619
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002620 if (qemu_in_coroutine()) {
2621 /* Fast-path if already in coroutine context */
2622 bdrv_rw_co_entry(&rwco);
2623 } else {
2624 co = qemu_coroutine_create(bdrv_rw_co_entry);
2625 qemu_coroutine_enter(co, &rwco);
2626 while (rwco.ret == NOT_DONE) {
2627 qemu_aio_wait();
2628 }
2629 }
2630 return rwco.ret;
2631}
2632
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002633/*
2634 * Process a synchronous request using coroutines
2635 */
2636static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002637 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002638{
2639 QEMUIOVector qiov;
2640 struct iovec iov = {
2641 .iov_base = (void *)buf,
2642 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2643 };
2644
2645 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002646 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2647 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002648}
2649
bellard19cb3732006-08-19 11:45:59 +00002650/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002651int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002652 uint8_t *buf, int nb_sectors)
2653{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002654 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002655}
2656
Markus Armbruster07d27a42012-06-29 17:34:29 +02002657/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2658int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2659 uint8_t *buf, int nb_sectors)
2660{
2661 bool enabled;
2662 int ret;
2663
2664 enabled = bs->io_limits_enabled;
2665 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002666 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002667 bs->io_limits_enabled = enabled;
2668 return ret;
2669}
2670
ths5fafdf22007-09-16 21:08:06 +00002671/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002672 -EIO generic I/O error (may happen for all errors)
2673 -ENOMEDIUM No media inserted.
2674 -EINVAL Invalid sector number or nb_sectors
2675 -EACCES Trying to write a read-only device
2676*/
ths5fafdf22007-09-16 21:08:06 +00002677int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002678 const uint8_t *buf, int nb_sectors)
2679{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002680 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002681}
2682
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002683int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2684 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002685{
2686 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002687 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002688}
2689
Peter Lievend75cbb52013-10-24 12:07:03 +02002690/*
2691 * Completely zero out a block device with the help of bdrv_write_zeroes.
2692 * The operation is sped up by checking the block status and only writing
2693 * zeroes to the device if they currently do not return zeroes. Optional
2694 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2695 *
2696 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2697 */
2698int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2699{
2700 int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
2701 int64_t ret, nb_sectors, sector_num = 0;
2702 int n;
2703
2704 for (;;) {
2705 nb_sectors = target_size - sector_num;
2706 if (nb_sectors <= 0) {
2707 return 0;
2708 }
2709 if (nb_sectors > INT_MAX) {
2710 nb_sectors = INT_MAX;
2711 }
2712 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002713 if (ret < 0) {
2714 error_report("error getting block status at sector %" PRId64 ": %s",
2715 sector_num, strerror(-ret));
2716 return ret;
2717 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002718 if (ret & BDRV_BLOCK_ZERO) {
2719 sector_num += n;
2720 continue;
2721 }
2722 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2723 if (ret < 0) {
2724 error_report("error writing zeroes at sector %" PRId64 ": %s",
2725 sector_num, strerror(-ret));
2726 return ret;
2727 }
2728 sector_num += n;
2729 }
2730}
2731
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002732int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002733{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002734 QEMUIOVector qiov;
2735 struct iovec iov = {
2736 .iov_base = (void *)buf,
2737 .iov_len = bytes,
2738 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002739 int ret;
bellard83f64092006-08-01 16:21:11 +00002740
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002741 if (bytes < 0) {
2742 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002743 }
2744
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002745 qemu_iovec_init_external(&qiov, &iov, 1);
2746 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2747 if (ret < 0) {
2748 return ret;
bellard83f64092006-08-01 16:21:11 +00002749 }
2750
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002751 return bytes;
bellard83f64092006-08-01 16:21:11 +00002752}
2753
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002754int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002755{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002756 int ret;
bellard83f64092006-08-01 16:21:11 +00002757
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002758 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2759 if (ret < 0) {
2760 return ret;
bellard83f64092006-08-01 16:21:11 +00002761 }
2762
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002763 return qiov->size;
2764}
2765
2766int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002767 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002768{
2769 QEMUIOVector qiov;
2770 struct iovec iov = {
2771 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002772 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002773 };
2774
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002775 if (bytes < 0) {
2776 return -EINVAL;
2777 }
2778
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002779 qemu_iovec_init_external(&qiov, &iov, 1);
2780 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002781}
bellard83f64092006-08-01 16:21:11 +00002782
Kevin Wolff08145f2010-06-16 16:38:15 +02002783/*
2784 * Writes to the file and ensures that no writes are reordered across this
2785 * request (acts as a barrier)
2786 *
2787 * Returns 0 on success, -errno in error cases.
2788 */
2789int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2790 const void *buf, int count)
2791{
2792 int ret;
2793
2794 ret = bdrv_pwrite(bs, offset, buf, count);
2795 if (ret < 0) {
2796 return ret;
2797 }
2798
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002799 /* No flush needed for cache modes that already do it */
2800 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002801 bdrv_flush(bs);
2802 }
2803
2804 return 0;
2805}
2806
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002807static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002808 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2809{
2810 /* Perform I/O through a temporary buffer so that users who scribble over
2811 * their read buffer while the operation is in progress do not end up
2812 * modifying the image file. This is critical for zero-copy guest I/O
2813 * where anything might happen inside guest memory.
2814 */
2815 void *bounce_buffer;
2816
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002817 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002818 struct iovec iov;
2819 QEMUIOVector bounce_qiov;
2820 int64_t cluster_sector_num;
2821 int cluster_nb_sectors;
2822 size_t skip_bytes;
2823 int ret;
2824
2825 /* Cover entire cluster so no additional backing file I/O is required when
2826 * allocating cluster in the image file.
2827 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002828 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2829 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002830
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002831 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2832 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002833
2834 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
2835 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
2836 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
2837
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002838 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
2839 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002840 if (ret < 0) {
2841 goto err;
2842 }
2843
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002844 if (drv->bdrv_co_write_zeroes &&
2845 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01002846 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002847 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002848 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002849 /* This does not change the data on the disk, it is not necessary
2850 * to flush even in cache=writethrough mode.
2851 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002852 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002853 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002854 }
2855
Stefan Hajnocziab185922011-11-17 13:40:31 +00002856 if (ret < 0) {
2857 /* It might be okay to ignore write errors for guest requests. If this
2858 * is a deliberate copy-on-read then we don't want to ignore the error.
2859 * Simply report it in all cases.
2860 */
2861 goto err;
2862 }
2863
2864 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04002865 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
2866 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002867
2868err:
2869 qemu_vfree(bounce_buffer);
2870 return ret;
2871}
2872
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002873/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002874 * Forwards an already correctly aligned request to the BlockDriver. This
2875 * handles copy on read and zeroing after EOF; any other features must be
2876 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002877 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002878static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01002879 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01002880 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02002881{
2882 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002883 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002884
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002885 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
2886 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002887
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002888 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
2889 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
2890
2891 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002892 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01002893 /* If we touch the same cluster it counts as an overlap. This
2894 * guarantees that allocating writes will be serialized and not race
2895 * with each other for the same cluster. For example, in copy-on-read
2896 * it ensures that the CoR read and write operations are atomic and
2897 * guest writes cannot interleave between them. */
2898 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002899 }
2900
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002901 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002902
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002903 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00002904 int pnum;
2905
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02002906 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002907 if (ret < 0) {
2908 goto out;
2909 }
2910
2911 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002912 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002913 goto out;
2914 }
2915 }
2916
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002917 /* Forward the request to the BlockDriver */
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08002918 if (!(bs->zero_beyond_eof && bs->growable)) {
2919 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
2920 } else {
2921 /* Read zeros after EOF of growable BDSes */
2922 int64_t len, total_sectors, max_nb_sectors;
2923
2924 len = bdrv_getlength(bs);
2925 if (len < 0) {
2926 ret = len;
2927 goto out;
2928 }
2929
Fam Zhengd055a1f2013-09-26 19:55:33 +08002930 total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE);
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01002931 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
2932 align >> BDRV_SECTOR_BITS);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08002933 if (max_nb_sectors > 0) {
2934 ret = drv->bdrv_co_readv(bs, sector_num,
2935 MIN(nb_sectors, max_nb_sectors), qiov);
2936 } else {
2937 ret = 0;
2938 }
2939
2940 /* Reading beyond end of file is supposed to produce zeroes */
2941 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
2942 uint64_t offset = MAX(0, total_sectors - sector_num);
2943 uint64_t bytes = (sector_num + nb_sectors - offset) *
2944 BDRV_SECTOR_SIZE;
2945 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
2946 }
2947 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00002948
2949out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002950 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002951}
2952
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002953/*
2954 * Handle a read request in coroutine context
2955 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01002956static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
2957 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002958 BdrvRequestFlags flags)
2959{
2960 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01002961 BdrvTrackedRequest req;
2962
Kevin Wolf1b0288a2013-12-02 16:09:46 +01002963 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
2964 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
2965 uint8_t *head_buf = NULL;
2966 uint8_t *tail_buf = NULL;
2967 QEMUIOVector local_qiov;
2968 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002969 int ret;
2970
2971 if (!drv) {
2972 return -ENOMEDIUM;
2973 }
Kevin Wolf1b0288a2013-12-02 16:09:46 +01002974 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002975 return -EIO;
2976 }
2977
2978 if (bs->copy_on_read) {
2979 flags |= BDRV_REQ_COPY_ON_READ;
2980 }
2981
2982 /* throttling disk I/O */
2983 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01002984 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002985 }
2986
Kevin Wolf1b0288a2013-12-02 16:09:46 +01002987 /* Align read if necessary by padding qiov */
2988 if (offset & (align - 1)) {
2989 head_buf = qemu_blockalign(bs, align);
2990 qemu_iovec_init(&local_qiov, qiov->niov + 2);
2991 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
2992 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
2993 use_local_qiov = true;
2994
2995 bytes += offset & (align - 1);
2996 offset = offset & ~(align - 1);
2997 }
2998
2999 if ((offset + bytes) & (align - 1)) {
3000 if (!use_local_qiov) {
3001 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3002 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3003 use_local_qiov = true;
3004 }
3005 tail_buf = qemu_blockalign(bs, align);
3006 qemu_iovec_add(&local_qiov, tail_buf,
3007 align - ((offset + bytes) & (align - 1)));
3008
3009 bytes = ROUND_UP(bytes, align);
3010 }
3011
Kevin Wolf65afd212013-12-03 14:55:55 +01003012 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003013 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003014 use_local_qiov ? &local_qiov : qiov,
3015 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003016 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003017
3018 if (use_local_qiov) {
3019 qemu_iovec_destroy(&local_qiov);
3020 qemu_vfree(head_buf);
3021 qemu_vfree(tail_buf);
3022 }
3023
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003024 return ret;
3025}
3026
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003027static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3028 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3029 BdrvRequestFlags flags)
3030{
3031 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3032 return -EINVAL;
3033 }
3034
3035 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3036 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3037}
3038
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003039int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003040 int nb_sectors, QEMUIOVector *qiov)
3041{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003042 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003043
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003044 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3045}
3046
3047int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3048 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3049{
3050 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3051
3052 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3053 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003054}
3055
Peter Lievenc31cb702013-10-24 12:06:58 +02003056/* if no limit is specified in the BlockLimits use a default
3057 * of 32768 512-byte sectors (16 MiB) per request.
3058 */
3059#define MAX_WRITE_ZEROES_DEFAULT 32768
3060
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003061static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003062 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003063{
3064 BlockDriver *drv = bs->drv;
3065 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003066 struct iovec iov = {0};
3067 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003068
Peter Lievenc31cb702013-10-24 12:06:58 +02003069 int max_write_zeroes = bs->bl.max_write_zeroes ?
3070 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
Kevin Wolf621f0582012-03-20 15:12:58 +01003071
Peter Lievenc31cb702013-10-24 12:06:58 +02003072 while (nb_sectors > 0 && !ret) {
3073 int num = nb_sectors;
3074
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003075 /* Align request. Block drivers can expect the "bulk" of the request
3076 * to be aligned.
3077 */
3078 if (bs->bl.write_zeroes_alignment
3079 && num > bs->bl.write_zeroes_alignment) {
3080 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3081 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003082 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003083 num -= sector_num % bs->bl.write_zeroes_alignment;
3084 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3085 /* Shorten the request to the last aligned sector. num cannot
3086 * underflow because num > bs->bl.write_zeroes_alignment.
3087 */
3088 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003089 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003090 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003091
3092 /* limit request size */
3093 if (num > max_write_zeroes) {
3094 num = max_write_zeroes;
3095 }
3096
3097 ret = -ENOTSUP;
3098 /* First try the efficient write zeroes operation */
3099 if (drv->bdrv_co_write_zeroes) {
3100 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3101 }
3102
3103 if (ret == -ENOTSUP) {
3104 /* Fall back to bounce buffer if write zeroes is unsupported */
3105 iov.iov_len = num * BDRV_SECTOR_SIZE;
3106 if (iov.iov_base == NULL) {
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003107 iov.iov_base = qemu_blockalign(bs, num * BDRV_SECTOR_SIZE);
3108 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003109 }
3110 qemu_iovec_init_external(&qiov, &iov, 1);
3111
3112 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003113
3114 /* Keep bounce buffer around if it is big enough for all
3115 * all future requests.
3116 */
3117 if (num < max_write_zeroes) {
3118 qemu_vfree(iov.iov_base);
3119 iov.iov_base = NULL;
3120 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003121 }
3122
3123 sector_num += num;
3124 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003125 }
3126
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003127 qemu_vfree(iov.iov_base);
3128 return ret;
3129}
3130
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003131/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003132 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003133 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003134static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003135 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3136 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003137{
3138 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003139 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003140 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003141
Kevin Wolfb404f722013-12-03 14:02:23 +01003142 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3143 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003144
Kevin Wolfb404f722013-12-03 14:02:23 +01003145 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3146 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003147
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003148 waited = wait_serialising_requests(req);
3149 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003150 assert(req->overlap_offset <= offset);
3151 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003152
Kevin Wolf65afd212013-12-03 14:55:55 +01003153 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003154
3155 if (ret < 0) {
3156 /* Do nothing, write notifier decided to fail this request */
3157 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003158 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003159 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003160 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003161 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003162 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3163 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003164 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003165
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003166 if (ret == 0 && !bs->enable_write_cache) {
3167 ret = bdrv_co_flush(bs);
3168 }
3169
Fam Zhenge4654d22013-11-13 18:29:43 +08003170 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003171
3172 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
3173 bs->wr_highest_sector = sector_num + nb_sectors - 1;
3174 }
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003175 if (bs->growable && ret >= 0) {
3176 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3177 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003178
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003179 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003180}
3181
Kevin Wolfb404f722013-12-03 14:02:23 +01003182/*
3183 * Handle a write request in coroutine context
3184 */
Kevin Wolf66015532013-12-03 14:40:18 +01003185static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3186 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003187 BdrvRequestFlags flags)
3188{
Kevin Wolf65afd212013-12-03 14:55:55 +01003189 BdrvTrackedRequest req;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003190 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3191 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3192 uint8_t *head_buf = NULL;
3193 uint8_t *tail_buf = NULL;
3194 QEMUIOVector local_qiov;
3195 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003196 int ret;
3197
3198 if (!bs->drv) {
3199 return -ENOMEDIUM;
3200 }
3201 if (bs->read_only) {
3202 return -EACCES;
3203 }
Kevin Wolf66015532013-12-03 14:40:18 +01003204 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfb404f722013-12-03 14:02:23 +01003205 return -EIO;
3206 }
3207
Kevin Wolfb404f722013-12-03 14:02:23 +01003208 /* throttling disk I/O */
3209 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003210 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003211 }
3212
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003213 /*
3214 * Align write if necessary by performing a read-modify-write cycle.
3215 * Pad qiov with the read parts and be sure to have a tracked request not
3216 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3217 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003218 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003219
3220 if (offset & (align - 1)) {
3221 QEMUIOVector head_qiov;
3222 struct iovec head_iov;
3223
3224 mark_request_serialising(&req, align);
3225 wait_serialising_requests(&req);
3226
3227 head_buf = qemu_blockalign(bs, align);
3228 head_iov = (struct iovec) {
3229 .iov_base = head_buf,
3230 .iov_len = align,
3231 };
3232 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3233
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003234 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003235 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3236 align, &head_qiov, 0);
3237 if (ret < 0) {
3238 goto fail;
3239 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003240 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003241
3242 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3243 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3244 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3245 use_local_qiov = true;
3246
3247 bytes += offset & (align - 1);
3248 offset = offset & ~(align - 1);
3249 }
3250
3251 if ((offset + bytes) & (align - 1)) {
3252 QEMUIOVector tail_qiov;
3253 struct iovec tail_iov;
3254 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003255 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003256
3257 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003258 waited = wait_serialising_requests(&req);
3259 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003260
3261 tail_buf = qemu_blockalign(bs, align);
3262 tail_iov = (struct iovec) {
3263 .iov_base = tail_buf,
3264 .iov_len = align,
3265 };
3266 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3267
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003268 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003269 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3270 align, &tail_qiov, 0);
3271 if (ret < 0) {
3272 goto fail;
3273 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003274 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003275
3276 if (!use_local_qiov) {
3277 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3278 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3279 use_local_qiov = true;
3280 }
3281
3282 tail_bytes = (offset + bytes) & (align - 1);
3283 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3284
3285 bytes = ROUND_UP(bytes, align);
3286 }
3287
3288 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3289 use_local_qiov ? &local_qiov : qiov,
3290 flags);
3291
3292fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003293 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003294
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003295 if (use_local_qiov) {
3296 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003297 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003298 qemu_vfree(head_buf);
3299 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003300
Kevin Wolfb404f722013-12-03 14:02:23 +01003301 return ret;
3302}
3303
Kevin Wolf66015532013-12-03 14:40:18 +01003304static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3305 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3306 BdrvRequestFlags flags)
3307{
3308 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3309 return -EINVAL;
3310 }
3311
3312 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3313 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3314}
3315
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003316int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3317 int nb_sectors, QEMUIOVector *qiov)
3318{
3319 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3320
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003321 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3322}
3323
3324int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003325 int64_t sector_num, int nb_sectors,
3326 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003327{
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003328 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003329
Peter Lievend32f35c2013-10-24 12:06:52 +02003330 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3331 flags &= ~BDRV_REQ_MAY_UNMAP;
3332 }
3333
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003334 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003335 BDRV_REQ_ZERO_WRITE | flags);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003336}
3337
bellard83f64092006-08-01 16:21:11 +00003338/**
bellard83f64092006-08-01 16:21:11 +00003339 * Truncate file to 'offset' bytes (needed only for file protocols)
3340 */
3341int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3342{
3343 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003344 int ret;
bellard83f64092006-08-01 16:21:11 +00003345 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003346 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003347 if (!drv->bdrv_truncate)
3348 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003349 if (bs->read_only)
3350 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02003351 if (bdrv_in_use(bs))
3352 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003353 ret = drv->bdrv_truncate(bs, offset);
3354 if (ret == 0) {
3355 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02003356 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003357 }
3358 return ret;
bellard83f64092006-08-01 16:21:11 +00003359}
3360
3361/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003362 * Length of a allocated file in bytes. Sparse files are counted by actual
3363 * allocated space. Return < 0 if error or unknown.
3364 */
3365int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3366{
3367 BlockDriver *drv = bs->drv;
3368 if (!drv) {
3369 return -ENOMEDIUM;
3370 }
3371 if (drv->bdrv_get_allocated_file_size) {
3372 return drv->bdrv_get_allocated_file_size(bs);
3373 }
3374 if (bs->file) {
3375 return bdrv_get_allocated_file_size(bs->file);
3376 }
3377 return -ENOTSUP;
3378}
3379
3380/**
bellard83f64092006-08-01 16:21:11 +00003381 * Length of a file in bytes. Return < 0 if error or unknown.
3382 */
3383int64_t bdrv_getlength(BlockDriverState *bs)
3384{
3385 BlockDriver *drv = bs->drv;
3386 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003387 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003388
Kevin Wolfb94a2612013-10-29 12:18:58 +01003389 if (drv->has_variable_length) {
3390 int ret = refresh_total_sectors(bs, bs->total_sectors);
3391 if (ret < 0) {
3392 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003393 }
bellard83f64092006-08-01 16:21:11 +00003394 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003395 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003396}
3397
bellard19cb3732006-08-19 11:45:59 +00003398/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003399void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003400{
bellard19cb3732006-08-19 11:45:59 +00003401 int64_t length;
3402 length = bdrv_getlength(bs);
3403 if (length < 0)
3404 length = 0;
3405 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01003406 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00003407 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00003408}
bellardcf989512004-02-16 21:56:36 +00003409
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003410void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3411 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003412{
3413 bs->on_read_error = on_read_error;
3414 bs->on_write_error = on_write_error;
3415}
3416
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003417BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003418{
3419 return is_read ? bs->on_read_error : bs->on_write_error;
3420}
3421
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003422BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3423{
3424 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3425
3426 switch (on_err) {
3427 case BLOCKDEV_ON_ERROR_ENOSPC:
3428 return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
3429 case BLOCKDEV_ON_ERROR_STOP:
3430 return BDRV_ACTION_STOP;
3431 case BLOCKDEV_ON_ERROR_REPORT:
3432 return BDRV_ACTION_REPORT;
3433 case BLOCKDEV_ON_ERROR_IGNORE:
3434 return BDRV_ACTION_IGNORE;
3435 default:
3436 abort();
3437 }
3438}
3439
3440/* This is done by device models because, while the block layer knows
3441 * about the error, it does not know whether an operation comes from
3442 * the device or the block layer (from a job, for example).
3443 */
3444void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3445 bool is_read, int error)
3446{
3447 assert(error >= 0);
Paolo Bonzini32c81a42012-09-28 17:22:58 +02003448 bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003449 if (action == BDRV_ACTION_STOP) {
3450 vm_stop(RUN_STATE_IO_ERROR);
3451 bdrv_iostatus_set_err(bs, error);
3452 }
3453}
3454
bellardb3380822004-03-14 21:38:54 +00003455int bdrv_is_read_only(BlockDriverState *bs)
3456{
3457 return bs->read_only;
3458}
3459
ths985a03b2007-12-24 16:10:43 +00003460int bdrv_is_sg(BlockDriverState *bs)
3461{
3462 return bs->sg;
3463}
3464
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003465int bdrv_enable_write_cache(BlockDriverState *bs)
3466{
3467 return bs->enable_write_cache;
3468}
3469
Paolo Bonzini425b0142012-06-06 00:04:52 +02003470void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3471{
3472 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003473
3474 /* so a reopen() will preserve wce */
3475 if (wce) {
3476 bs->open_flags |= BDRV_O_CACHE_WB;
3477 } else {
3478 bs->open_flags &= ~BDRV_O_CACHE_WB;
3479 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003480}
3481
bellardea2384d2004-08-01 21:59:26 +00003482int bdrv_is_encrypted(BlockDriverState *bs)
3483{
3484 if (bs->backing_hd && bs->backing_hd->encrypted)
3485 return 1;
3486 return bs->encrypted;
3487}
3488
aliguoric0f4ce72009-03-05 23:01:01 +00003489int bdrv_key_required(BlockDriverState *bs)
3490{
3491 BlockDriverState *backing_hd = bs->backing_hd;
3492
3493 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3494 return 1;
3495 return (bs->encrypted && !bs->valid_key);
3496}
3497
bellardea2384d2004-08-01 21:59:26 +00003498int bdrv_set_key(BlockDriverState *bs, const char *key)
3499{
3500 int ret;
3501 if (bs->backing_hd && bs->backing_hd->encrypted) {
3502 ret = bdrv_set_key(bs->backing_hd, key);
3503 if (ret < 0)
3504 return ret;
3505 if (!bs->encrypted)
3506 return 0;
3507 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003508 if (!bs->encrypted) {
3509 return -EINVAL;
3510 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3511 return -ENOMEDIUM;
3512 }
aliguoric0f4ce72009-03-05 23:01:01 +00003513 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003514 if (ret < 0) {
3515 bs->valid_key = 0;
3516 } else if (!bs->valid_key) {
3517 bs->valid_key = 1;
3518 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02003519 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00003520 }
aliguoric0f4ce72009-03-05 23:01:01 +00003521 return ret;
bellardea2384d2004-08-01 21:59:26 +00003522}
3523
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003524const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003525{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003526 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003527}
3528
ths5fafdf22007-09-16 21:08:06 +00003529void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003530 void *opaque)
3531{
3532 BlockDriver *drv;
3533
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003534 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00003535 it(opaque, drv->format_name);
3536 }
3537}
3538
Benoît Canetdc364f42014-01-23 21:31:32 +01003539/* This function is to find block backend bs */
bellardb3380822004-03-14 21:38:54 +00003540BlockDriverState *bdrv_find(const char *name)
3541{
3542 BlockDriverState *bs;
3543
Benoît Canetdc364f42014-01-23 21:31:32 +01003544 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003545 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00003546 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003547 }
bellardb3380822004-03-14 21:38:54 +00003548 }
3549 return NULL;
3550}
3551
Benoît Canetdc364f42014-01-23 21:31:32 +01003552/* This function is to find a node in the bs graph */
3553BlockDriverState *bdrv_find_node(const char *node_name)
3554{
3555 BlockDriverState *bs;
3556
3557 assert(node_name);
3558
3559 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3560 if (!strcmp(node_name, bs->node_name)) {
3561 return bs;
3562 }
3563 }
3564 return NULL;
3565}
3566
Benoît Canetc13163f2014-01-23 21:31:34 +01003567/* Put this QMP function here so it can access the static graph_bdrv_states. */
3568BlockDeviceInfoList *bdrv_named_nodes_list(void)
3569{
3570 BlockDeviceInfoList *list, *entry;
3571 BlockDriverState *bs;
3572
3573 list = NULL;
3574 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3575 entry = g_malloc0(sizeof(*entry));
3576 entry->value = bdrv_block_device_info(bs);
3577 entry->next = list;
3578 list = entry;
3579 }
3580
3581 return list;
3582}
3583
Benoît Canet12d3ba82014-01-23 21:31:35 +01003584BlockDriverState *bdrv_lookup_bs(const char *device,
3585 const char *node_name,
3586 Error **errp)
3587{
3588 BlockDriverState *bs = NULL;
3589
Benoît Canet12d3ba82014-01-23 21:31:35 +01003590 if (device) {
3591 bs = bdrv_find(device);
3592
Benoît Canetdd67fa52014-02-12 17:15:06 +01003593 if (bs) {
3594 return bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003595 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003596 }
3597
Benoît Canetdd67fa52014-02-12 17:15:06 +01003598 if (node_name) {
3599 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003600
Benoît Canetdd67fa52014-02-12 17:15:06 +01003601 if (bs) {
3602 return bs;
3603 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003604 }
3605
Benoît Canetdd67fa52014-02-12 17:15:06 +01003606 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3607 device ? device : "",
3608 node_name ? node_name : "");
3609 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003610}
3611
Markus Armbruster2f399b02010-06-02 18:55:20 +02003612BlockDriverState *bdrv_next(BlockDriverState *bs)
3613{
3614 if (!bs) {
3615 return QTAILQ_FIRST(&bdrv_states);
3616 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003617 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003618}
3619
aliguori51de9762009-03-05 23:00:43 +00003620void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00003621{
3622 BlockDriverState *bs;
3623
Benoît Canetdc364f42014-01-23 21:31:32 +01003624 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
aliguori51de9762009-03-05 23:00:43 +00003625 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00003626 }
3627}
3628
bellardea2384d2004-08-01 21:59:26 +00003629const char *bdrv_get_device_name(BlockDriverState *bs)
3630{
3631 return bs->device_name;
3632}
3633
Markus Armbrusterc8433282012-06-05 16:49:24 +02003634int bdrv_get_flags(BlockDriverState *bs)
3635{
3636 return bs->open_flags;
3637}
3638
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003639int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00003640{
3641 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003642 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00003643
Benoît Canetdc364f42014-01-23 21:31:32 +01003644 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003645 int ret = bdrv_flush(bs);
3646 if (ret < 0 && !result) {
3647 result = ret;
3648 }
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003649 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003650
3651 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00003652}
3653
Peter Lieven3ac21622013-06-28 12:47:42 +02003654int bdrv_has_zero_init_1(BlockDriverState *bs)
3655{
3656 return 1;
3657}
3658
Kevin Wolff2feebb2010-04-14 17:30:35 +02003659int bdrv_has_zero_init(BlockDriverState *bs)
3660{
3661 assert(bs->drv);
3662
Paolo Bonzini11212d82013-09-04 19:00:27 +02003663 /* If BS is a copy on write image, it is initialized to
3664 the contents of the base image, which may not be zeroes. */
3665 if (bs->backing_hd) {
3666 return 0;
3667 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02003668 if (bs->drv->bdrv_has_zero_init) {
3669 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02003670 }
3671
Peter Lieven3ac21622013-06-28 12:47:42 +02003672 /* safe default */
3673 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02003674}
3675
Peter Lieven4ce78692013-10-24 12:06:54 +02003676bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3677{
3678 BlockDriverInfo bdi;
3679
3680 if (bs->backing_hd) {
3681 return false;
3682 }
3683
3684 if (bdrv_get_info(bs, &bdi) == 0) {
3685 return bdi.unallocated_blocks_are_zero;
3686 }
3687
3688 return false;
3689}
3690
3691bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3692{
3693 BlockDriverInfo bdi;
3694
3695 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3696 return false;
3697 }
3698
3699 if (bdrv_get_info(bs, &bdi) == 0) {
3700 return bdi.can_write_zeroes_with_unmap;
3701 }
3702
3703 return false;
3704}
3705
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003706typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003707 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01003708 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003709 int64_t sector_num;
3710 int nb_sectors;
3711 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003712 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003713 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003714} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003715
thsf58c7b32008-06-05 21:53:49 +00003716/*
3717 * Returns true iff the specified sector is present in the disk image. Drivers
3718 * not implementing the functionality are assumed to not support backing files,
3719 * hence all their sectors are reported as allocated.
3720 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003721 * If 'sector_num' is beyond the end of the disk image the return value is 0
3722 * and 'pnum' is set to 0.
3723 *
thsf58c7b32008-06-05 21:53:49 +00003724 * 'pnum' is set to the number of sectors (including and immediately following
3725 * the specified sector) that are known to be in the same
3726 * allocated/unallocated state.
3727 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003728 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
3729 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00003730 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003731static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
3732 int64_t sector_num,
3733 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00003734{
Paolo Bonzini617ccb42013-09-04 19:00:23 +02003735 int64_t length;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003736 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003737 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003738
Paolo Bonzini617ccb42013-09-04 19:00:23 +02003739 length = bdrv_getlength(bs);
3740 if (length < 0) {
3741 return length;
3742 }
3743
3744 if (sector_num >= (length >> BDRV_SECTOR_BITS)) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003745 *pnum = 0;
3746 return 0;
3747 }
3748
3749 n = bs->total_sectors - sector_num;
3750 if (n < nb_sectors) {
3751 nb_sectors = n;
3752 }
3753
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003754 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003755 *pnum = nb_sectors;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02003756 ret = BDRV_BLOCK_DATA;
3757 if (bs->drv->protocol_name) {
3758 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
3759 }
3760 return ret;
thsf58c7b32008-06-05 21:53:49 +00003761 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003762
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003763 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
3764 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02003765 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003766 return ret;
3767 }
3768
Peter Lieven92bc50a2013-10-08 14:43:14 +02003769 if (ret & BDRV_BLOCK_RAW) {
3770 assert(ret & BDRV_BLOCK_OFFSET_VALID);
3771 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3772 *pnum, pnum);
3773 }
3774
Peter Lievenc3d86882013-10-24 12:07:04 +02003775 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
3776 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003777 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02003778 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003779 BlockDriverState *bs2 = bs->backing_hd;
3780 int64_t length2 = bdrv_getlength(bs2);
3781 if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) {
3782 ret |= BDRV_BLOCK_ZERO;
3783 }
3784 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003785 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003786
3787 if (bs->file &&
3788 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
3789 (ret & BDRV_BLOCK_OFFSET_VALID)) {
3790 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3791 *pnum, pnum);
3792 if (ret2 >= 0) {
3793 /* Ignore errors. This is just providing extra information, it
3794 * is useful but not necessary.
3795 */
3796 ret |= (ret2 & BDRV_BLOCK_ZERO);
3797 }
3798 }
3799
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003800 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003801}
3802
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003803/* Coroutine wrapper for bdrv_get_block_status() */
3804static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003805{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003806 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003807 BlockDriverState *bs = data->bs;
3808
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003809 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
3810 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003811 data->done = true;
3812}
3813
3814/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003815 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003816 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003817 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003818 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003819int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
3820 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003821{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003822 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003823 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003824 .bs = bs,
3825 .sector_num = sector_num,
3826 .nb_sectors = nb_sectors,
3827 .pnum = pnum,
3828 .done = false,
3829 };
3830
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003831 if (qemu_in_coroutine()) {
3832 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003833 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003834 } else {
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003835 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003836 qemu_coroutine_enter(co, &data);
3837 while (!data.done) {
3838 qemu_aio_wait();
3839 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003840 }
3841 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00003842}
3843
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003844int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
3845 int nb_sectors, int *pnum)
3846{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02003847 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
3848 if (ret < 0) {
3849 return ret;
3850 }
3851 return
3852 (ret & BDRV_BLOCK_DATA) ||
3853 ((ret & BDRV_BLOCK_ZERO) && !bdrv_has_zero_init(bs));
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003854}
3855
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003856/*
3857 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
3858 *
3859 * Return true if the given sector is allocated in any image between
3860 * BASE and TOP (inclusive). BASE can be NULL to check if the given
3861 * sector is allocated in any image of the chain. Return false otherwise.
3862 *
3863 * 'pnum' is set to the number of sectors (including and immediately following
3864 * the specified sector) that are known to be in the same
3865 * allocated/unallocated state.
3866 *
3867 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02003868int bdrv_is_allocated_above(BlockDriverState *top,
3869 BlockDriverState *base,
3870 int64_t sector_num,
3871 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003872{
3873 BlockDriverState *intermediate;
3874 int ret, n = nb_sectors;
3875
3876 intermediate = top;
3877 while (intermediate && intermediate != base) {
3878 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003879 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
3880 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003881 if (ret < 0) {
3882 return ret;
3883 } else if (ret) {
3884 *pnum = pnum_inter;
3885 return 1;
3886 }
3887
3888 /*
3889 * [sector_num, nb_sectors] is unallocated on top but intermediate
3890 * might have
3891 *
3892 * [sector_num+x, nr_sectors] allocated.
3893 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08003894 if (n > pnum_inter &&
3895 (intermediate == top ||
3896 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003897 n = pnum_inter;
3898 }
3899
3900 intermediate = intermediate->backing_hd;
3901 }
3902
3903 *pnum = n;
3904 return 0;
3905}
3906
aliguori045df332009-03-05 23:00:48 +00003907const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3908{
3909 if (bs->backing_hd && bs->backing_hd->encrypted)
3910 return bs->backing_file;
3911 else if (bs->encrypted)
3912 return bs->filename;
3913 else
3914 return NULL;
3915}
3916
ths5fafdf22007-09-16 21:08:06 +00003917void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00003918 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00003919{
Kevin Wolf3574c602011-10-26 11:02:11 +02003920 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00003921}
3922
ths5fafdf22007-09-16 21:08:06 +00003923int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00003924 const uint8_t *buf, int nb_sectors)
3925{
3926 BlockDriver *drv = bs->drv;
3927 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003928 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00003929 if (!drv->bdrv_write_compressed)
3930 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02003931 if (bdrv_check_request(bs, sector_num, nb_sectors))
3932 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003933
Fam Zhenge4654d22013-11-13 18:29:43 +08003934 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003935
bellardfaea38e2006-08-05 21:31:00 +00003936 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
3937}
ths3b46e622007-09-17 08:09:54 +00003938
bellardfaea38e2006-08-05 21:31:00 +00003939int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3940{
3941 BlockDriver *drv = bs->drv;
3942 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003943 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00003944 if (!drv->bdrv_get_info)
3945 return -ENOTSUP;
3946 memset(bdi, 0, sizeof(*bdi));
3947 return drv->bdrv_get_info(bs, bdi);
3948}
3949
Max Reitzeae041f2013-10-09 10:46:16 +02003950ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3951{
3952 BlockDriver *drv = bs->drv;
3953 if (drv && drv->bdrv_get_specific_info) {
3954 return drv->bdrv_get_specific_info(bs);
3955 }
3956 return NULL;
3957}
3958
Christoph Hellwig45566e92009-07-10 23:11:57 +02003959int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
3960 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00003961{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02003962 QEMUIOVector qiov;
3963 struct iovec iov = {
3964 .iov_base = (void *) buf,
3965 .iov_len = size,
3966 };
3967
3968 qemu_iovec_init_external(&qiov, &iov, 1);
3969 return bdrv_writev_vmstate(bs, &qiov, pos);
3970}
3971
3972int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
3973{
aliguori178e08a2009-04-05 19:10:55 +00003974 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02003975
3976 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00003977 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02003978 } else if (drv->bdrv_save_vmstate) {
3979 return drv->bdrv_save_vmstate(bs, qiov, pos);
3980 } else if (bs->file) {
3981 return bdrv_writev_vmstate(bs->file, qiov, pos);
3982 }
3983
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09003984 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00003985}
3986
Christoph Hellwig45566e92009-07-10 23:11:57 +02003987int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
3988 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00003989{
3990 BlockDriver *drv = bs->drv;
3991 if (!drv)
3992 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09003993 if (drv->bdrv_load_vmstate)
3994 return drv->bdrv_load_vmstate(bs, buf, pos, size);
3995 if (bs->file)
3996 return bdrv_load_vmstate(bs->file, buf, pos, size);
3997 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00003998}
3999
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004000void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4001{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004002 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004003 return;
4004 }
4005
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004006 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004007}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004008
Kevin Wolf41c695c2012-12-06 14:32:58 +01004009int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4010 const char *tag)
4011{
4012 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4013 bs = bs->file;
4014 }
4015
4016 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4017 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4018 }
4019
4020 return -ENOTSUP;
4021}
4022
Fam Zheng4cc70e92013-11-20 10:01:54 +08004023int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4024{
4025 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4026 bs = bs->file;
4027 }
4028
4029 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4030 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4031 }
4032
4033 return -ENOTSUP;
4034}
4035
Kevin Wolf41c695c2012-12-06 14:32:58 +01004036int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4037{
4038 while (bs && bs->drv && !bs->drv->bdrv_debug_resume) {
4039 bs = bs->file;
4040 }
4041
4042 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4043 return bs->drv->bdrv_debug_resume(bs, tag);
4044 }
4045
4046 return -ENOTSUP;
4047}
4048
4049bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4050{
4051 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4052 bs = bs->file;
4053 }
4054
4055 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4056 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4057 }
4058
4059 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004060}
4061
Blue Swirl199630b2010-07-25 20:49:34 +00004062int bdrv_is_snapshot(BlockDriverState *bs)
4063{
4064 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4065}
4066
Jeff Codyb1b1d782012-10-16 15:49:09 -04004067/* backing_file can either be relative, or absolute, or a protocol. If it is
4068 * relative, it must be relative to the chain. So, passing in bs->filename
4069 * from a BDS as backing_file should not be done, as that may be relative to
4070 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004071BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4072 const char *backing_file)
4073{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004074 char *filename_full = NULL;
4075 char *backing_file_full = NULL;
4076 char *filename_tmp = NULL;
4077 int is_protocol = 0;
4078 BlockDriverState *curr_bs = NULL;
4079 BlockDriverState *retval = NULL;
4080
4081 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004082 return NULL;
4083 }
4084
Jeff Codyb1b1d782012-10-16 15:49:09 -04004085 filename_full = g_malloc(PATH_MAX);
4086 backing_file_full = g_malloc(PATH_MAX);
4087 filename_tmp = g_malloc(PATH_MAX);
4088
4089 is_protocol = path_has_protocol(backing_file);
4090
4091 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4092
4093 /* If either of the filename paths is actually a protocol, then
4094 * compare unmodified paths; otherwise make paths relative */
4095 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4096 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4097 retval = curr_bs->backing_hd;
4098 break;
4099 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004100 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004101 /* If not an absolute filename path, make it relative to the current
4102 * image's filename path */
4103 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4104 backing_file);
4105
4106 /* We are going to compare absolute pathnames */
4107 if (!realpath(filename_tmp, filename_full)) {
4108 continue;
4109 }
4110
4111 /* We need to make sure the backing filename we are comparing against
4112 * is relative to the current image filename (or absolute) */
4113 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4114 curr_bs->backing_file);
4115
4116 if (!realpath(filename_tmp, backing_file_full)) {
4117 continue;
4118 }
4119
4120 if (strcmp(backing_file_full, filename_full) == 0) {
4121 retval = curr_bs->backing_hd;
4122 break;
4123 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004124 }
4125 }
4126
Jeff Codyb1b1d782012-10-16 15:49:09 -04004127 g_free(filename_full);
4128 g_free(backing_file_full);
4129 g_free(filename_tmp);
4130 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004131}
4132
Benoît Canetf198fd12012-08-02 10:22:47 +02004133int bdrv_get_backing_file_depth(BlockDriverState *bs)
4134{
4135 if (!bs->drv) {
4136 return 0;
4137 }
4138
4139 if (!bs->backing_hd) {
4140 return 0;
4141 }
4142
4143 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4144}
4145
Jeff Cody79fac562012-09-27 13:29:15 -04004146BlockDriverState *bdrv_find_base(BlockDriverState *bs)
4147{
4148 BlockDriverState *curr_bs = NULL;
4149
4150 if (!bs) {
4151 return NULL;
4152 }
4153
4154 curr_bs = bs;
4155
4156 while (curr_bs->backing_hd) {
4157 curr_bs = curr_bs->backing_hd;
4158 }
4159 return curr_bs;
4160}
4161
bellard83f64092006-08-01 16:21:11 +00004162/**************************************************************/
4163/* async I/Os */
4164
aliguori3b69e4b2009-01-22 16:59:24 +00004165BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00004166 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00004167 BlockDriverCompletionFunc *cb, void *opaque)
4168{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004169 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4170
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004171 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004172 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004173}
4174
aliguorif141eaf2009-04-07 18:43:24 +00004175BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4176 QEMUIOVector *qiov, int nb_sectors,
4177 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004178{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004179 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4180
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004181 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004182 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004183}
4184
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004185BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4186 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4187 BlockDriverCompletionFunc *cb, void *opaque)
4188{
4189 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4190
4191 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4192 BDRV_REQ_ZERO_WRITE | flags,
4193 cb, opaque, true);
4194}
4195
Kevin Wolf40b4f532009-09-09 17:53:37 +02004196
4197typedef struct MultiwriteCB {
4198 int error;
4199 int num_requests;
4200 int num_callbacks;
4201 struct {
4202 BlockDriverCompletionFunc *cb;
4203 void *opaque;
4204 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004205 } callbacks[];
4206} MultiwriteCB;
4207
4208static void multiwrite_user_cb(MultiwriteCB *mcb)
4209{
4210 int i;
4211
4212 for (i = 0; i < mcb->num_callbacks; i++) {
4213 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004214 if (mcb->callbacks[i].free_qiov) {
4215 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4216 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004217 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004218 }
4219}
4220
4221static void multiwrite_cb(void *opaque, int ret)
4222{
4223 MultiwriteCB *mcb = opaque;
4224
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004225 trace_multiwrite_cb(mcb, ret);
4226
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004227 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004228 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004229 }
4230
4231 mcb->num_requests--;
4232 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004233 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004234 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004235 }
4236}
4237
4238static int multiwrite_req_compare(const void *a, const void *b)
4239{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004240 const BlockRequest *req1 = a, *req2 = b;
4241
4242 /*
4243 * Note that we can't simply subtract req2->sector from req1->sector
4244 * here as that could overflow the return value.
4245 */
4246 if (req1->sector > req2->sector) {
4247 return 1;
4248 } else if (req1->sector < req2->sector) {
4249 return -1;
4250 } else {
4251 return 0;
4252 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004253}
4254
4255/*
4256 * Takes a bunch of requests and tries to merge them. Returns the number of
4257 * requests that remain after merging.
4258 */
4259static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4260 int num_reqs, MultiwriteCB *mcb)
4261{
4262 int i, outidx;
4263
4264 // Sort requests by start sector
4265 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4266
4267 // Check if adjacent requests touch the same clusters. If so, combine them,
4268 // filling up gaps with zero sectors.
4269 outidx = 0;
4270 for (i = 1; i < num_reqs; i++) {
4271 int merge = 0;
4272 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4273
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004274 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004275 if (reqs[i].sector <= oldreq_last) {
4276 merge = 1;
4277 }
4278
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004279 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4280 merge = 0;
4281 }
4282
Kevin Wolf40b4f532009-09-09 17:53:37 +02004283 if (merge) {
4284 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004285 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004286 qemu_iovec_init(qiov,
4287 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4288
4289 // Add the first request to the merged one. If the requests are
4290 // overlapping, drop the last sectors of the first request.
4291 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004292 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004293
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004294 // We should need to add any zeros between the two requests
4295 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004296
4297 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004298 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004299
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004300 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004301 reqs[outidx].qiov = qiov;
4302
4303 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4304 } else {
4305 outidx++;
4306 reqs[outidx].sector = reqs[i].sector;
4307 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4308 reqs[outidx].qiov = reqs[i].qiov;
4309 }
4310 }
4311
4312 return outidx + 1;
4313}
4314
4315/*
4316 * Submit multiple AIO write requests at once.
4317 *
4318 * On success, the function returns 0 and all requests in the reqs array have
4319 * been submitted. In error case this function returns -1, and any of the
4320 * requests may or may not be submitted yet. In particular, this means that the
4321 * callback will be called for some of the requests, for others it won't. The
4322 * caller must check the error field of the BlockRequest to wait for the right
4323 * callbacks (if error != 0, no callback will be called).
4324 *
4325 * The implementation may modify the contents of the reqs array, e.g. to merge
4326 * requests. However, the fields opaque and error are left unmodified as they
4327 * are used to signal failure for a single request to the caller.
4328 */
4329int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4330{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004331 MultiwriteCB *mcb;
4332 int i;
4333
Ryan Harper301db7c2011-03-07 10:01:04 -06004334 /* don't submit writes if we don't have a medium */
4335 if (bs->drv == NULL) {
4336 for (i = 0; i < num_reqs; i++) {
4337 reqs[i].error = -ENOMEDIUM;
4338 }
4339 return -1;
4340 }
4341
Kevin Wolf40b4f532009-09-09 17:53:37 +02004342 if (num_reqs == 0) {
4343 return 0;
4344 }
4345
4346 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004347 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004348 mcb->num_requests = 0;
4349 mcb->num_callbacks = num_reqs;
4350
4351 for (i = 0; i < num_reqs; i++) {
4352 mcb->callbacks[i].cb = reqs[i].cb;
4353 mcb->callbacks[i].opaque = reqs[i].opaque;
4354 }
4355
4356 // Check for mergable requests
4357 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4358
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004359 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4360
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004361 /* Run the aio requests. */
4362 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004363 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004364 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4365 reqs[i].nb_sectors, reqs[i].flags,
4366 multiwrite_cb, mcb,
4367 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004368 }
4369
4370 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004371}
4372
bellard83f64092006-08-01 16:21:11 +00004373void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004374{
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004375 acb->aiocb_info->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00004376}
4377
4378/**************************************************************/
4379/* async block device emulation */
4380
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004381typedef struct BlockDriverAIOCBSync {
4382 BlockDriverAIOCB common;
4383 QEMUBH *bh;
4384 int ret;
4385 /* vector translation state */
4386 QEMUIOVector *qiov;
4387 uint8_t *bounce;
4388 int is_write;
4389} BlockDriverAIOCBSync;
4390
4391static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
4392{
Kevin Wolfb666d232010-05-05 11:44:39 +02004393 BlockDriverAIOCBSync *acb =
4394 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03004395 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004396 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004397 qemu_aio_release(acb);
4398}
4399
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004400static const AIOCBInfo bdrv_em_aiocb_info = {
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004401 .aiocb_size = sizeof(BlockDriverAIOCBSync),
4402 .cancel = bdrv_aio_cancel_em,
4403};
4404
bellard83f64092006-08-01 16:21:11 +00004405static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004406{
pbrookce1a14d2006-08-07 02:38:06 +00004407 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004408
aliguorif141eaf2009-04-07 18:43:24 +00004409 if (!acb->is_write)
Michael Tokarev03396142012-06-07 20:17:55 +04004410 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00004411 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004412 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004413 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004414 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00004415 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00004416}
bellardbeac80c2006-06-26 20:08:57 +00004417
aliguorif141eaf2009-04-07 18:43:24 +00004418static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4419 int64_t sector_num,
4420 QEMUIOVector *qiov,
4421 int nb_sectors,
4422 BlockDriverCompletionFunc *cb,
4423 void *opaque,
4424 int is_write)
4425
bellardea2384d2004-08-01 21:59:26 +00004426{
pbrookce1a14d2006-08-07 02:38:06 +00004427 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004428
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004429 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004430 acb->is_write = is_write;
4431 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00004432 acb->bounce = qemu_blockalign(bs, qiov->size);
Paolo Bonzini3f3aace2011-11-14 17:50:54 +01004433 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004434
4435 if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004436 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004437 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004438 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004439 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004440 }
4441
pbrookce1a14d2006-08-07 02:38:06 +00004442 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004443
pbrookce1a14d2006-08-07 02:38:06 +00004444 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004445}
4446
aliguorif141eaf2009-04-07 18:43:24 +00004447static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4448 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00004449 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004450{
aliguorif141eaf2009-04-07 18:43:24 +00004451 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004452}
4453
aliguorif141eaf2009-04-07 18:43:24 +00004454static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4455 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4456 BlockDriverCompletionFunc *cb, void *opaque)
4457{
4458 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4459}
4460
Kevin Wolf68485422011-06-30 10:05:46 +02004461
4462typedef struct BlockDriverAIOCBCoroutine {
4463 BlockDriverAIOCB common;
4464 BlockRequest req;
4465 bool is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004466 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004467 QEMUBH* bh;
4468} BlockDriverAIOCBCoroutine;
4469
4470static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
4471{
Kevin Wolfd318aea2012-11-13 16:35:08 +01004472 BlockDriverAIOCBCoroutine *acb =
4473 container_of(blockacb, BlockDriverAIOCBCoroutine, common);
4474 bool done = false;
4475
4476 acb->done = &done;
4477 while (!done) {
4478 qemu_aio_wait();
4479 }
Kevin Wolf68485422011-06-30 10:05:46 +02004480}
4481
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004482static const AIOCBInfo bdrv_em_co_aiocb_info = {
Kevin Wolf68485422011-06-30 10:05:46 +02004483 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
4484 .cancel = bdrv_aio_co_cancel_em,
4485};
4486
Paolo Bonzini35246a62011-10-14 10:41:29 +02004487static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004488{
4489 BlockDriverAIOCBCoroutine *acb = opaque;
4490
4491 acb->common.cb(acb->common.opaque, acb->req.error);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004492
4493 if (acb->done) {
4494 *acb->done = true;
4495 }
4496
Kevin Wolf68485422011-06-30 10:05:46 +02004497 qemu_bh_delete(acb->bh);
4498 qemu_aio_release(acb);
4499}
4500
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004501/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4502static void coroutine_fn bdrv_co_do_rw(void *opaque)
4503{
4504 BlockDriverAIOCBCoroutine *acb = opaque;
4505 BlockDriverState *bs = acb->common.bs;
4506
4507 if (!acb->is_write) {
4508 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004509 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004510 } else {
4511 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004512 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004513 }
4514
Paolo Bonzini35246a62011-10-14 10:41:29 +02004515 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004516 qemu_bh_schedule(acb->bh);
4517}
4518
Kevin Wolf68485422011-06-30 10:05:46 +02004519static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4520 int64_t sector_num,
4521 QEMUIOVector *qiov,
4522 int nb_sectors,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004523 BdrvRequestFlags flags,
Kevin Wolf68485422011-06-30 10:05:46 +02004524 BlockDriverCompletionFunc *cb,
4525 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004526 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004527{
4528 Coroutine *co;
4529 BlockDriverAIOCBCoroutine *acb;
4530
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004531 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolf68485422011-06-30 10:05:46 +02004532 acb->req.sector = sector_num;
4533 acb->req.nb_sectors = nb_sectors;
4534 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004535 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004536 acb->is_write = is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004537 acb->done = NULL;
Kevin Wolf68485422011-06-30 10:05:46 +02004538
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004539 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004540 qemu_coroutine_enter(co, acb);
4541
4542 return &acb->common;
4543}
4544
Paolo Bonzini07f07612011-10-17 12:32:12 +02004545static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004546{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004547 BlockDriverAIOCBCoroutine *acb = opaque;
4548 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004549
Paolo Bonzini07f07612011-10-17 12:32:12 +02004550 acb->req.error = bdrv_co_flush(bs);
4551 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004552 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004553}
4554
Paolo Bonzini07f07612011-10-17 12:32:12 +02004555BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02004556 BlockDriverCompletionFunc *cb, void *opaque)
4557{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004558 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004559
Paolo Bonzini07f07612011-10-17 12:32:12 +02004560 Coroutine *co;
4561 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004562
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004563 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004564 acb->done = NULL;
4565
Paolo Bonzini07f07612011-10-17 12:32:12 +02004566 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4567 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004568
Alexander Graf016f5cf2010-05-26 17:51:49 +02004569 return &acb->common;
4570}
4571
Paolo Bonzini4265d622011-10-17 12:32:14 +02004572static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4573{
4574 BlockDriverAIOCBCoroutine *acb = opaque;
4575 BlockDriverState *bs = acb->common.bs;
4576
4577 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
4578 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
4579 qemu_bh_schedule(acb->bh);
4580}
4581
4582BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
4583 int64_t sector_num, int nb_sectors,
4584 BlockDriverCompletionFunc *cb, void *opaque)
4585{
4586 Coroutine *co;
4587 BlockDriverAIOCBCoroutine *acb;
4588
4589 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4590
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004591 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004592 acb->req.sector = sector_num;
4593 acb->req.nb_sectors = nb_sectors;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004594 acb->done = NULL;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004595 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4596 qemu_coroutine_enter(co, acb);
4597
4598 return &acb->common;
4599}
4600
bellardea2384d2004-08-01 21:59:26 +00004601void bdrv_init(void)
4602{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05004603 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00004604}
pbrookce1a14d2006-08-07 02:38:06 +00004605
Markus Armbrustereb852012009-10-27 18:41:44 +01004606void bdrv_init_with_whitelist(void)
4607{
4608 use_bdrv_whitelist = 1;
4609 bdrv_init();
4610}
4611
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004612void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004613 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00004614{
pbrookce1a14d2006-08-07 02:38:06 +00004615 BlockDriverAIOCB *acb;
4616
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004617 acb = g_slice_alloc(aiocb_info->aiocb_size);
4618 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00004619 acb->bs = bs;
4620 acb->cb = cb;
4621 acb->opaque = opaque;
4622 return acb;
4623}
4624
4625void qemu_aio_release(void *p)
4626{
Stefan Hajnoczid37c9752012-10-31 16:34:36 +01004627 BlockDriverAIOCB *acb = p;
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004628 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
pbrookce1a14d2006-08-07 02:38:06 +00004629}
bellard19cb3732006-08-19 11:45:59 +00004630
4631/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004632/* Coroutine block device emulation */
4633
4634typedef struct CoroutineIOCompletion {
4635 Coroutine *coroutine;
4636 int ret;
4637} CoroutineIOCompletion;
4638
4639static void bdrv_co_io_em_complete(void *opaque, int ret)
4640{
4641 CoroutineIOCompletion *co = opaque;
4642
4643 co->ret = ret;
4644 qemu_coroutine_enter(co->coroutine, NULL);
4645}
4646
4647static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4648 int nb_sectors, QEMUIOVector *iov,
4649 bool is_write)
4650{
4651 CoroutineIOCompletion co = {
4652 .coroutine = qemu_coroutine_self(),
4653 };
4654 BlockDriverAIOCB *acb;
4655
4656 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004657 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4658 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004659 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004660 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4661 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004662 }
4663
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01004664 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004665 if (!acb) {
4666 return -EIO;
4667 }
4668 qemu_coroutine_yield();
4669
4670 return co.ret;
4671}
4672
4673static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4674 int64_t sector_num, int nb_sectors,
4675 QEMUIOVector *iov)
4676{
4677 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4678}
4679
4680static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4681 int64_t sector_num, int nb_sectors,
4682 QEMUIOVector *iov)
4683{
4684 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4685}
4686
Paolo Bonzini07f07612011-10-17 12:32:12 +02004687static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004688{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004689 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004690
Paolo Bonzini07f07612011-10-17 12:32:12 +02004691 rwco->ret = bdrv_co_flush(rwco->bs);
4692}
4693
4694int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4695{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004696 int ret;
4697
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004698 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02004699 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004700 }
4701
Kevin Wolfca716362011-11-10 18:13:59 +01004702 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004703 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004704 if (bs->drv->bdrv_co_flush_to_os) {
4705 ret = bs->drv->bdrv_co_flush_to_os(bs);
4706 if (ret < 0) {
4707 return ret;
4708 }
4709 }
4710
Kevin Wolfca716362011-11-10 18:13:59 +01004711 /* But don't actually force it to the disk with cache=unsafe */
4712 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02004713 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01004714 }
4715
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004716 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004717 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004718 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004719 } else if (bs->drv->bdrv_aio_flush) {
4720 BlockDriverAIOCB *acb;
4721 CoroutineIOCompletion co = {
4722 .coroutine = qemu_coroutine_self(),
4723 };
4724
4725 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
4726 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004727 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004728 } else {
4729 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004730 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004731 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02004732 } else {
4733 /*
4734 * Some block drivers always operate in either writethrough or unsafe
4735 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
4736 * know how the server works (because the behaviour is hardcoded or
4737 * depends on server-side configuration), so we can't ensure that
4738 * everything is safe on disk. Returning an error doesn't work because
4739 * that would break guests even if the server operates in writethrough
4740 * mode.
4741 *
4742 * Let's hope the user knows what he's doing.
4743 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004744 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004745 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004746 if (ret < 0) {
4747 return ret;
4748 }
4749
4750 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
4751 * in the case of cache=unsafe, so there are no useless flushes.
4752 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02004753flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004754 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004755}
4756
Anthony Liguori0f154232011-11-14 15:09:45 -06004757void bdrv_invalidate_cache(BlockDriverState *bs)
4758{
4759 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
4760 bs->drv->bdrv_invalidate_cache(bs);
4761 }
4762}
4763
4764void bdrv_invalidate_cache_all(void)
4765{
4766 BlockDriverState *bs;
4767
Benoît Canetdc364f42014-01-23 21:31:32 +01004768 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Anthony Liguori0f154232011-11-14 15:09:45 -06004769 bdrv_invalidate_cache(bs);
4770 }
4771}
4772
Benoît Canet07789262012-03-23 08:36:49 +01004773void bdrv_clear_incoming_migration_all(void)
4774{
4775 BlockDriverState *bs;
4776
Benoît Canetdc364f42014-01-23 21:31:32 +01004777 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Benoît Canet07789262012-03-23 08:36:49 +01004778 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
4779 }
4780}
4781
Paolo Bonzini07f07612011-10-17 12:32:12 +02004782int bdrv_flush(BlockDriverState *bs)
4783{
4784 Coroutine *co;
4785 RwCo rwco = {
4786 .bs = bs,
4787 .ret = NOT_DONE,
4788 };
4789
4790 if (qemu_in_coroutine()) {
4791 /* Fast-path if already in coroutine context */
4792 bdrv_flush_co_entry(&rwco);
4793 } else {
4794 co = qemu_coroutine_create(bdrv_flush_co_entry);
4795 qemu_coroutine_enter(co, &rwco);
4796 while (rwco.ret == NOT_DONE) {
4797 qemu_aio_wait();
4798 }
4799 }
4800
4801 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004802}
4803
Kevin Wolf775aa8b2013-12-05 12:09:38 +01004804typedef struct DiscardCo {
4805 BlockDriverState *bs;
4806 int64_t sector_num;
4807 int nb_sectors;
4808 int ret;
4809} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004810static void coroutine_fn bdrv_discard_co_entry(void *opaque)
4811{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01004812 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004813
4814 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
4815}
4816
Peter Lieven6f14da52013-10-24 12:06:59 +02004817/* if no limit is specified in the BlockLimits use a default
4818 * of 32768 512-byte sectors (16 MiB) per request.
4819 */
4820#define MAX_DISCARD_DEFAULT 32768
4821
Paolo Bonzini4265d622011-10-17 12:32:14 +02004822int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
4823 int nb_sectors)
4824{
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004825 int max_discard;
4826
Paolo Bonzini4265d622011-10-17 12:32:14 +02004827 if (!bs->drv) {
4828 return -ENOMEDIUM;
4829 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
4830 return -EIO;
4831 } else if (bs->read_only) {
4832 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01004833 }
4834
Fam Zhenge4654d22013-11-13 18:29:43 +08004835 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01004836
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01004837 /* Do nothing if disabled. */
4838 if (!(bs->open_flags & BDRV_O_UNMAP)) {
4839 return 0;
4840 }
4841
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004842 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02004843 return 0;
4844 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004845
4846 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
4847 while (nb_sectors > 0) {
4848 int ret;
4849 int num = nb_sectors;
4850
4851 /* align request */
4852 if (bs->bl.discard_alignment &&
4853 num >= bs->bl.discard_alignment &&
4854 sector_num % bs->bl.discard_alignment) {
4855 if (num > bs->bl.discard_alignment) {
4856 num = bs->bl.discard_alignment;
4857 }
4858 num -= sector_num % bs->bl.discard_alignment;
4859 }
4860
4861 /* limit request size */
4862 if (num > max_discard) {
4863 num = max_discard;
4864 }
4865
4866 if (bs->drv->bdrv_co_discard) {
4867 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
4868 } else {
4869 BlockDriverAIOCB *acb;
4870 CoroutineIOCompletion co = {
4871 .coroutine = qemu_coroutine_self(),
4872 };
4873
4874 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
4875 bdrv_co_io_em_complete, &co);
4876 if (acb == NULL) {
4877 return -EIO;
4878 } else {
4879 qemu_coroutine_yield();
4880 ret = co.ret;
4881 }
4882 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01004883 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004884 return ret;
4885 }
4886
4887 sector_num += num;
4888 nb_sectors -= num;
4889 }
4890 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004891}
4892
4893int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
4894{
4895 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01004896 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02004897 .bs = bs,
4898 .sector_num = sector_num,
4899 .nb_sectors = nb_sectors,
4900 .ret = NOT_DONE,
4901 };
4902
4903 if (qemu_in_coroutine()) {
4904 /* Fast-path if already in coroutine context */
4905 bdrv_discard_co_entry(&rwco);
4906 } else {
4907 co = qemu_coroutine_create(bdrv_discard_co_entry);
4908 qemu_coroutine_enter(co, &rwco);
4909 while (rwco.ret == NOT_DONE) {
4910 qemu_aio_wait();
4911 }
4912 }
4913
4914 return rwco.ret;
4915}
4916
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004917/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00004918/* removable device support */
4919
4920/**
4921 * Return TRUE if the media is present
4922 */
4923int bdrv_is_inserted(BlockDriverState *bs)
4924{
4925 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02004926
bellard19cb3732006-08-19 11:45:59 +00004927 if (!drv)
4928 return 0;
4929 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02004930 return 1;
4931 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00004932}
4933
4934/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02004935 * Return whether the media changed since the last call to this
4936 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00004937 */
4938int bdrv_media_changed(BlockDriverState *bs)
4939{
4940 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00004941
Markus Armbruster8e49ca42011-08-03 15:08:08 +02004942 if (drv && drv->bdrv_media_changed) {
4943 return drv->bdrv_media_changed(bs);
4944 }
4945 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00004946}
4947
4948/**
4949 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
4950 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02004951void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00004952{
4953 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00004954
Markus Armbruster822e1cd2011-07-20 18:23:42 +02004955 if (drv && drv->bdrv_eject) {
4956 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00004957 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02004958
4959 if (bs->device_name[0] != '\0') {
4960 bdrv_emit_qmp_eject_event(bs, eject_flag);
4961 }
bellard19cb3732006-08-19 11:45:59 +00004962}
4963
bellard19cb3732006-08-19 11:45:59 +00004964/**
4965 * Lock or unlock the media (if it is locked, the user won't be able
4966 * to eject it manually).
4967 */
Markus Armbruster025e8492011-09-06 18:58:47 +02004968void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00004969{
4970 BlockDriver *drv = bs->drv;
4971
Markus Armbruster025e8492011-09-06 18:58:47 +02004972 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01004973
Markus Armbruster025e8492011-09-06 18:58:47 +02004974 if (drv && drv->bdrv_lock_medium) {
4975 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00004976 }
4977}
ths985a03b2007-12-24 16:10:43 +00004978
4979/* needed for generic scsi interface */
4980
4981int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
4982{
4983 BlockDriver *drv = bs->drv;
4984
4985 if (drv && drv->bdrv_ioctl)
4986 return drv->bdrv_ioctl(bs, req, buf);
4987 return -ENOTSUP;
4988}
aliguori7d780662009-03-12 19:57:08 +00004989
aliguori221f7152009-03-28 17:28:41 +00004990BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
4991 unsigned long int req, void *buf,
4992 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00004993{
aliguori221f7152009-03-28 17:28:41 +00004994 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00004995
aliguori221f7152009-03-28 17:28:41 +00004996 if (drv && drv->bdrv_aio_ioctl)
4997 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
4998 return NULL;
aliguori7d780662009-03-12 19:57:08 +00004999}
aliguorie268ca52009-04-22 20:20:00 +00005000
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005001void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005002{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005003 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005004}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005005
aliguorie268ca52009-04-22 20:20:00 +00005006void *qemu_blockalign(BlockDriverState *bs, size_t size)
5007{
Kevin Wolf339064d2013-11-28 10:23:32 +01005008 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005009}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005010
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005011/*
5012 * Check if all memory in this vector is sector aligned.
5013 */
5014bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5015{
5016 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005017 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005018
5019 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005020 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005021 return false;
5022 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005023 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005024 return false;
5025 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005026 }
5027
5028 return true;
5029}
5030
Fam Zhenge4654d22013-11-13 18:29:43 +08005031BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005032{
5033 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005034 BdrvDirtyBitmap *bitmap;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005035
Paolo Bonzini50717e92013-01-21 17:09:45 +01005036 assert((granularity & (granularity - 1)) == 0);
5037
Fam Zhenge4654d22013-11-13 18:29:43 +08005038 granularity >>= BDRV_SECTOR_BITS;
5039 assert(granularity);
5040 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
5041 bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
5042 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5043 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5044 return bitmap;
5045}
5046
5047void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5048{
5049 BdrvDirtyBitmap *bm, *next;
5050 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5051 if (bm == bitmap) {
5052 QLIST_REMOVE(bitmap, list);
5053 hbitmap_free(bitmap->bitmap);
5054 g_free(bitmap);
5055 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005056 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005057 }
5058}
5059
Fam Zheng21b56832013-11-13 18:29:44 +08005060BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5061{
5062 BdrvDirtyBitmap *bm;
5063 BlockDirtyInfoList *list = NULL;
5064 BlockDirtyInfoList **plist = &list;
5065
5066 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5067 BlockDirtyInfo *info = g_malloc0(sizeof(BlockDirtyInfo));
5068 BlockDirtyInfoList *entry = g_malloc0(sizeof(BlockDirtyInfoList));
5069 info->count = bdrv_get_dirty_count(bs, bm);
5070 info->granularity =
5071 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5072 entry->value = info;
5073 *plist = entry;
5074 plist = &entry->next;
5075 }
5076
5077 return list;
5078}
5079
Fam Zhenge4654d22013-11-13 18:29:43 +08005080int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005081{
Fam Zhenge4654d22013-11-13 18:29:43 +08005082 if (bitmap) {
5083 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005084 } else {
5085 return 0;
5086 }
5087}
5088
Fam Zhenge4654d22013-11-13 18:29:43 +08005089void bdrv_dirty_iter_init(BlockDriverState *bs,
5090 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005091{
Fam Zhenge4654d22013-11-13 18:29:43 +08005092 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005093}
5094
5095void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5096 int nr_sectors)
5097{
Fam Zhenge4654d22013-11-13 18:29:43 +08005098 BdrvDirtyBitmap *bitmap;
5099 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5100 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005101 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005102}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005103
Fam Zhenge4654d22013-11-13 18:29:43 +08005104void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
5105{
5106 BdrvDirtyBitmap *bitmap;
5107 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5108 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5109 }
5110}
5111
5112int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5113{
5114 return hbitmap_count(bitmap->bitmap);
5115}
5116
Fam Zheng9fcb0252013-08-23 09:14:46 +08005117/* Get a reference to bs */
5118void bdrv_ref(BlockDriverState *bs)
5119{
5120 bs->refcnt++;
5121}
5122
5123/* Release a previously grabbed reference to bs.
5124 * If after releasing, reference count is zero, the BlockDriverState is
5125 * deleted. */
5126void bdrv_unref(BlockDriverState *bs)
5127{
5128 assert(bs->refcnt > 0);
5129 if (--bs->refcnt == 0) {
5130 bdrv_delete(bs);
5131 }
5132}
5133
Marcelo Tosattidb593f22011-01-26 12:12:34 -02005134void bdrv_set_in_use(BlockDriverState *bs, int in_use)
5135{
5136 assert(bs->in_use != in_use);
5137 bs->in_use = in_use;
5138}
5139
5140int bdrv_in_use(BlockDriverState *bs)
5141{
5142 return bs->in_use;
5143}
5144
Luiz Capitulino28a72822011-09-26 17:43:50 -03005145void bdrv_iostatus_enable(BlockDriverState *bs)
5146{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005147 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005148 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005149}
5150
5151/* The I/O status is only enabled if the drive explicitly
5152 * enables it _and_ the VM is configured to stop on errors */
5153bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5154{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005155 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005156 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5157 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5158 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005159}
5160
5161void bdrv_iostatus_disable(BlockDriverState *bs)
5162{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005163 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005164}
5165
5166void bdrv_iostatus_reset(BlockDriverState *bs)
5167{
5168 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005169 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005170 if (bs->job) {
5171 block_job_iostatus_reset(bs->job);
5172 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005173 }
5174}
5175
Luiz Capitulino28a72822011-09-26 17:43:50 -03005176void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5177{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005178 assert(bdrv_iostatus_is_enabled(bs));
5179 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005180 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5181 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005182 }
5183}
5184
Christoph Hellwiga597e792011-08-25 08:26:01 +02005185void
5186bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
5187 enum BlockAcctType type)
5188{
5189 assert(type < BDRV_MAX_IOTYPE);
5190
5191 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02005192 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02005193 cookie->type = type;
5194}
5195
5196void
5197bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
5198{
5199 assert(cookie->type < BDRV_MAX_IOTYPE);
5200
5201 bs->nr_bytes[cookie->type] += cookie->bytes;
5202 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02005203 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02005204}
5205
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005206void bdrv_img_create(const char *filename, const char *fmt,
5207 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005208 char *options, uint64_t img_size, int flags,
5209 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005210{
5211 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02005212 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005213 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005214 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005215 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005216 int ret = 0;
5217
5218 /* Find driver and parse its options */
5219 drv = bdrv_find_format(fmt);
5220 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005221 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005222 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005223 }
5224
Kevin Wolf98289622013-07-10 15:47:39 +02005225 proto_drv = bdrv_find_protocol(filename, true);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005226 if (!proto_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005227 error_setg(errp, "Unknown protocol '%s'", filename);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005228 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005229 }
5230
5231 create_options = append_option_parameters(create_options,
5232 drv->create_options);
5233 create_options = append_option_parameters(create_options,
5234 proto_drv->create_options);
5235
5236 /* Create parameter list with default values */
5237 param = parse_option_parameters("", create_options, param);
5238
5239 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
5240
5241 /* Parse -o options */
5242 if (options) {
5243 param = parse_option_parameters(options, create_options, param);
5244 if (param == NULL) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005245 error_setg(errp, "Invalid options for file format '%s'.", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005246 goto out;
5247 }
5248 }
5249
5250 if (base_filename) {
5251 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
5252 base_filename)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005253 error_setg(errp, "Backing file not supported for file format '%s'",
5254 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005255 goto out;
5256 }
5257 }
5258
5259 if (base_fmt) {
5260 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005261 error_setg(errp, "Backing file format not supported for file "
5262 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005263 goto out;
5264 }
5265 }
5266
Jes Sorensen792da932010-12-16 13:52:17 +01005267 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
5268 if (backing_file && backing_file->value.s) {
5269 if (!strcmp(filename, backing_file->value.s)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005270 error_setg(errp, "Error: Trying to create an image with the "
5271 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01005272 goto out;
5273 }
5274 }
5275
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005276 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
5277 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005278 backing_drv = bdrv_find_format(backing_fmt->value.s);
5279 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005280 error_setg(errp, "Unknown backing file format '%s'",
5281 backing_fmt->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005282 goto out;
5283 }
5284 }
5285
5286 // The size for the image must always be specified, with one exception:
5287 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02005288 size = get_option_parameter(param, BLOCK_OPT_SIZE);
5289 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005290 if (backing_file && backing_file->value.s) {
Max Reitz66f6b812013-12-03 14:57:52 +01005291 BlockDriverState *bs;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005292 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005293 char buf[32];
Paolo Bonzini63090da2012-04-12 14:01:03 +02005294 int back_flags;
5295
5296 /* backing files always opened read-only */
5297 back_flags =
5298 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005299
Max Reitzf67503e2014-02-18 18:33:05 +01005300 bs = NULL;
Max Reitzddf56362014-02-18 18:33:06 +01005301 ret = bdrv_open(&bs, backing_file->value.s, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02005302 backing_drv, &local_err);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005303 if (ret < 0) {
Max Reitzcc84d902013-09-06 17:14:26 +02005304 error_setg_errno(errp, -ret, "Could not open '%s': %s",
5305 backing_file->value.s,
5306 error_get_pretty(local_err));
5307 error_free(local_err);
5308 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005309 goto out;
5310 }
5311 bdrv_get_geometry(bs, &size);
5312 size *= 512;
5313
5314 snprintf(buf, sizeof(buf), "%" PRId64, size);
5315 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
Max Reitz66f6b812013-12-03 14:57:52 +01005316
5317 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005318 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005319 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005320 goto out;
5321 }
5322 }
5323
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005324 if (!quiet) {
5325 printf("Formatting '%s', fmt=%s ", filename, fmt);
5326 print_option_parameters(param);
5327 puts("");
5328 }
Max Reitzcc84d902013-09-06 17:14:26 +02005329 ret = bdrv_create(drv, filename, param, &local_err);
5330 if (ret == -EFBIG) {
5331 /* This is generally a better message than whatever the driver would
5332 * deliver (especially because of the cluster_size_hint), since that
5333 * is most probably not much different from "image too large". */
5334 const char *cluster_size_hint = "";
5335 if (get_option_parameter(create_options, BLOCK_OPT_CLUSTER_SIZE)) {
5336 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005337 }
Max Reitzcc84d902013-09-06 17:14:26 +02005338 error_setg(errp, "The image size is too large for file format '%s'"
5339 "%s", fmt, cluster_size_hint);
5340 error_free(local_err);
5341 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005342 }
5343
5344out:
5345 free_option_parameters(create_options);
5346 free_option_parameters(param);
5347
Markus Armbruster84d18f02014-01-30 15:07:28 +01005348 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02005349 error_propagate(errp, local_err);
5350 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005351}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005352
5353AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5354{
5355 /* Currently BlockDriverState always uses the main loop AioContext */
5356 return qemu_get_aio_context();
5357}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005358
5359void bdrv_add_before_write_notifier(BlockDriverState *bs,
5360 NotifierWithReturn *notifier)
5361{
5362 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5363}
Max Reitz6f176b42013-09-03 10:09:50 +02005364
5365int bdrv_amend_options(BlockDriverState *bs, QEMUOptionParameter *options)
5366{
5367 if (bs->drv->bdrv_amend_options == NULL) {
5368 return -ENOTSUP;
5369 }
5370 return bs->drv->bdrv_amend_options(bs, options);
5371}
Benoît Canetf6186f42013-10-02 14:33:48 +02005372
Benoît Canet212a5a82014-01-23 21:31:36 +01005373/* Used to recurse on single child block filters.
5374 * Single child block filter will store their child in bs->file.
5375 */
5376bool bdrv_generic_is_first_non_filter(BlockDriverState *bs,
5377 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02005378{
Benoît Canet212a5a82014-01-23 21:31:36 +01005379 if (!bs->drv) {
5380 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02005381 }
5382
Benoît Canet212a5a82014-01-23 21:31:36 +01005383 if (!bs->drv->authorizations[BS_IS_A_FILTER]) {
5384 if (bs == candidate) {
5385 return true;
5386 } else {
5387 return false;
5388 }
Benoît Canetf6186f42013-10-02 14:33:48 +02005389 }
5390
Benoît Canet212a5a82014-01-23 21:31:36 +01005391 if (!bs->drv->authorizations[BS_FILTER_PASS_DOWN]) {
5392 return false;
5393 }
5394
5395 if (!bs->file) {
5396 return false;
5397 }
5398
5399 return bdrv_recurse_is_first_non_filter(bs->file, candidate);
Benoît Canetf6186f42013-10-02 14:33:48 +02005400}
5401
Benoît Canet212a5a82014-01-23 21:31:36 +01005402bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5403 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02005404{
Benoît Canet212a5a82014-01-23 21:31:36 +01005405 if (bs->drv && bs->drv->bdrv_recurse_is_first_non_filter) {
5406 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
5407 }
5408
5409 return bdrv_generic_is_first_non_filter(bs, candidate);
5410}
5411
5412/* This function checks if the candidate is the first non filter bs down it's
5413 * bs chain. Since we don't have pointers to parents it explore all bs chains
5414 * from the top. Some filters can choose not to pass down the recursion.
5415 */
5416bool bdrv_is_first_non_filter(BlockDriverState *candidate)
5417{
5418 BlockDriverState *bs;
5419
5420 /* walk down the bs forest recursively */
5421 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5422 bool perm;
5423
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01005424 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01005425
5426 /* candidate is the first non filter */
5427 if (perm) {
5428 return true;
5429 }
5430 }
5431
5432 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02005433}