blob: 17c34dc240d96bf4197badcab113bb81823d86b8 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
Vladimir Sementsov-Ogievskiyc20555e2021-04-28 18:18:04 +03005 * Copyright (c) 2020 Virtuozzo International GmbH.
ths5fafdf22007-09-16 21:08:06 +00006 *
bellardfc01f7e2003-06-30 10:03:06 +00007 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
Markus Armbrustere688df62018-02-01 12:18:31 +010025
Peter Maydelld38ea872016-01-29 17:50:05 +000026#include "qemu/osdep.h"
Daniel P. Berrange0ab8ed12017-01-25 16:14:15 +000027#include "block/trace.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010028#include "block/block_int.h"
29#include "block/blockjob.h"
Markus Armbrustere2c1c342022-12-21 14:35:49 +010030#include "block/dirty-bitmap.h"
Max Reitz0c9b70d2020-10-27 20:05:42 +010031#include "block/fuse.h"
Kevin Wolfcd7fca92016-07-06 11:22:39 +020032#include "block/nbd.h"
Max Reitz609f45e2018-06-14 21:14:28 +020033#include "block/qdict.h"
Markus Armbrusterd49b6832015-03-17 18:29:20 +010034#include "qemu/error-report.h"
Marc-André Lureau5e5733e2019-08-29 22:34:43 +040035#include "block/module_block.h"
Markus Armbrusterdb725812019-08-12 07:23:50 +020036#include "qemu/main-loop.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010037#include "qemu/module.h"
Markus Armbrustere688df62018-02-01 12:18:31 +010038#include "qapi/error.h"
Daniel P. Berrangé407bc4b2024-11-18 16:12:34 +010039#include "qobject/qdict.h"
40#include "qobject/qjson.h"
41#include "qobject/qnull.h"
42#include "qobject/qstring.h"
Kevin Wolfe1d74bc2018-01-10 15:52:33 +010043#include "qapi/qobject-output-visitor.h"
44#include "qapi/qapi-visit-block-core.h"
Philippe Mathieu-Daudé32cad1f2024-12-03 15:20:13 +010045#include "system/block-backend.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010046#include "qemu/notify.h"
Markus Armbruster922a01a2018-02-01 12:18:46 +010047#include "qemu/option.h"
Daniel P. Berrange10817bf2015-09-01 14:48:02 +010048#include "qemu/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010049#include "block/qapi.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010050#include "qemu/timer.h"
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020051#include "qemu/cutils.h"
52#include "qemu/id.h"
Hanna Reitz0bc329f2021-08-12 10:41:44 +020053#include "qemu/range.h"
54#include "qemu/rcu.h"
Vladimir Sementsov-Ogievskiy21c22832020-09-24 21:54:10 +030055#include "block/coroutines.h"
bellardfc01f7e2003-06-30 10:03:06 +000056
Juan Quintela71e72a12009-07-27 16:12:56 +020057#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000058#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000059#include <sys/queue.h>
Joelle van Dynefeccdce2021-03-15 11:03:39 -070060#if defined(HAVE_SYS_DISK_H)
bellard7674e7b2005-04-26 21:59:26 +000061#include <sys/disk.h>
62#endif
blueswir1c5e97232009-03-07 20:06:23 +000063#endif
bellard7674e7b2005-04-26 21:59:26 +000064
aliguori49dc7682009-03-08 16:26:59 +000065#ifdef _WIN32
66#include <windows.h>
67#endif
68
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010069#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
70
Emanuele Giuseppe Esposito3b491a92022-03-03 10:15:48 -050071/* Protected by BQL */
Benoît Canetdc364f42014-01-23 21:31:32 +010072static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
73 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
74
Emanuele Giuseppe Esposito3b491a92022-03-03 10:15:48 -050075/* Protected by BQL */
Max Reitz2c1d04e2016-01-29 16:36:11 +010076static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
78
Emanuele Giuseppe Esposito3b491a92022-03-03 10:15:48 -050079/* Protected by BQL */
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010080static QLIST_HEAD(, BlockDriver) bdrv_drivers =
81 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000082
Max Reitz5b363932016-05-17 16:41:31 +020083static BlockDriverState *bdrv_open_inherit(const char *filename,
84 const char *reference,
85 QDict *options, int flags,
86 BlockDriverState *parent,
Max Reitzbd86fb92020-05-13 13:05:13 +020087 const BdrvChildClass *child_class,
Max Reitz272c02e2020-05-13 13:05:17 +020088 BdrvChildRole child_role,
Kevin Wolf7ead9462024-04-25 14:56:02 +020089 bool parse_filename,
Max Reitz5b363932016-05-17 16:41:31 +020090 Error **errp);
Kevin Wolff3930ed2015-04-08 13:43:47 +020091
Kevin Wolfbfb8aa62021-10-18 15:47:14 +020092static bool bdrv_recurse_has_child(BlockDriverState *bs,
93 BlockDriverState *child);
94
Kevin Wolfad29eb32023-09-11 11:46:07 +020095static void GRAPH_WRLOCK
96bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs);
97
Kevin Wolf2f64e1f2023-09-11 11:46:08 +020098static void GRAPH_WRLOCK
99bdrv_remove_child(BdrvChild *child, Transaction *tran);
Vladimir Sementsov-Ogievskiy09786232021-04-28 18:17:44 +0300100
Vladimir Sementsov-Ogievskiy72373e42021-04-28 18:17:58 +0300101static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
102 BlockReopenQueue *queue,
Alberto Garciaecd30d22021-06-10 15:05:36 +0300103 Transaction *change_child_tran, Error **errp);
Vladimir Sementsov-Ogievskiy53e96d12021-04-28 18:17:35 +0300104static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
105static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
106
Emanuele Giuseppe Espositofa8fc1d2021-12-15 07:11:38 -0500107static bool bdrv_backing_overridden(BlockDriverState *bs);
108
Fiona Ebner91ba0e12025-05-30 17:10:45 +0200109static bool GRAPH_RDLOCK
110bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
111 GHashTable *visited, Transaction *tran, Error **errp);
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -0400112
Markus Armbrustereb852012009-10-27 18:41:44 +0100113/* If non-zero, use only whitelisted block drivers */
114static int use_bdrv_whitelist;
115
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000116#ifdef _WIN32
117static int is_windows_drive_prefix(const char *filename)
118{
119 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
120 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
121 filename[1] == ':');
122}
123
124int is_windows_drive(const char *filename)
125{
126 if (is_windows_drive_prefix(filename) &&
127 filename[2] == '\0')
128 return 1;
129 if (strstart(filename, "\\\\.\\", NULL) ||
130 strstart(filename, "//./", NULL))
131 return 1;
132 return 0;
133}
134#endif
135
Kevin Wolf339064d2013-11-28 10:23:32 +0100136size_t bdrv_opt_mem_align(BlockDriverState *bs)
137{
138 if (!bs || !bs->drv) {
Denis V. Lunev459b4e62015-05-12 17:30:56 +0300139 /* page size or 4k (hdd sector size) should be on the safe side */
Marc-André Lureau8e3b0cb2022-03-23 19:57:22 +0400140 return MAX(4096, qemu_real_host_page_size());
Kevin Wolf339064d2013-11-28 10:23:32 +0100141 }
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -0500142 IO_CODE();
Kevin Wolf339064d2013-11-28 10:23:32 +0100143
144 return bs->bl.opt_mem_alignment;
145}
146
Denis V. Lunev4196d2f2015-05-12 17:30:55 +0300147size_t bdrv_min_mem_align(BlockDriverState *bs)
148{
149 if (!bs || !bs->drv) {
Denis V. Lunev459b4e62015-05-12 17:30:56 +0300150 /* page size or 4k (hdd sector size) should be on the safe side */
Marc-André Lureau8e3b0cb2022-03-23 19:57:22 +0400151 return MAX(4096, qemu_real_host_page_size());
Denis V. Lunev4196d2f2015-05-12 17:30:55 +0300152 }
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -0500153 IO_CODE();
Denis V. Lunev4196d2f2015-05-12 17:30:55 +0300154
155 return bs->bl.min_mem_alignment;
156}
157
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000158/* check if the path starts with "<protocol>:" */
Max Reitz5c984152014-12-03 14:57:22 +0100159int path_has_protocol(const char *path)
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000160{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200161 const char *p;
162
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000163#ifdef _WIN32
164 if (is_windows_drive(path) ||
165 is_windows_drive_prefix(path)) {
166 return 0;
167 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200168 p = path + strcspn(path, ":/\\");
169#else
170 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000171#endif
172
Paolo Bonzini947995c2012-05-08 16:51:48 +0200173 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000174}
175
bellard83f64092006-08-01 16:21:11 +0000176int path_is_absolute(const char *path)
177{
bellard21664422007-01-07 18:22:37 +0000178#ifdef _WIN32
179 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200180 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000181 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200182 }
183 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000184#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200185 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000186#endif
bellard83f64092006-08-01 16:21:11 +0000187}
188
Max Reitz009b03a2019-02-01 20:29:13 +0100189/* if filename is absolute, just return its duplicate. Otherwise, build a
bellard83f64092006-08-01 16:21:11 +0000190 path to it by considering it is relative to base_path. URL are
191 supported. */
Max Reitz009b03a2019-02-01 20:29:13 +0100192char *path_combine(const char *base_path, const char *filename)
bellard83f64092006-08-01 16:21:11 +0000193{
Max Reitz009b03a2019-02-01 20:29:13 +0100194 const char *protocol_stripped = NULL;
bellard83f64092006-08-01 16:21:11 +0000195 const char *p, *p1;
Max Reitz009b03a2019-02-01 20:29:13 +0100196 char *result;
bellard83f64092006-08-01 16:21:11 +0000197 int len;
198
bellard83f64092006-08-01 16:21:11 +0000199 if (path_is_absolute(filename)) {
Max Reitz009b03a2019-02-01 20:29:13 +0100200 return g_strdup(filename);
bellard83f64092006-08-01 16:21:11 +0000201 }
Max Reitz009b03a2019-02-01 20:29:13 +0100202
203 if (path_has_protocol(base_path)) {
204 protocol_stripped = strchr(base_path, ':');
205 if (protocol_stripped) {
206 protocol_stripped++;
207 }
208 }
209 p = protocol_stripped ?: base_path;
210
211 p1 = strrchr(base_path, '/');
212#ifdef _WIN32
213 {
214 const char *p2;
215 p2 = strrchr(base_path, '\\');
216 if (!p1 || p2 > p1) {
217 p1 = p2;
218 }
219 }
220#endif
221 if (p1) {
222 p1++;
223 } else {
224 p1 = base_path;
225 }
226 if (p1 > p) {
227 p = p1;
228 }
229 len = p - base_path;
230
231 result = g_malloc(len + strlen(filename) + 1);
232 memcpy(result, base_path, len);
233 strcpy(result + len, filename);
234
235 return result;
236}
237
Max Reitz03c320d2017-05-22 21:52:16 +0200238/*
239 * Helper function for bdrv_parse_filename() implementations to remove optional
240 * protocol prefixes (especially "file:") from a filename and for putting the
241 * stripped filename into the options QDict if there is such a prefix.
242 */
243void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
244 QDict *options)
245{
246 if (strstart(filename, prefix, &filename)) {
247 /* Stripping the explicit protocol prefix may result in a protocol
248 * prefix being (wrongly) detected (if the filename contains a colon) */
249 if (path_has_protocol(filename)) {
Markus Armbruster18cf67c2020-12-11 18:11:51 +0100250 GString *fat_filename;
Max Reitz03c320d2017-05-22 21:52:16 +0200251
252 /* This means there is some colon before the first slash; therefore,
253 * this cannot be an absolute path */
254 assert(!path_is_absolute(filename));
255
256 /* And we can thus fix the protocol detection issue by prefixing it
257 * by "./" */
Markus Armbruster18cf67c2020-12-11 18:11:51 +0100258 fat_filename = g_string_new("./");
259 g_string_append(fat_filename, filename);
Max Reitz03c320d2017-05-22 21:52:16 +0200260
Markus Armbruster18cf67c2020-12-11 18:11:51 +0100261 assert(!path_has_protocol(fat_filename->str));
Max Reitz03c320d2017-05-22 21:52:16 +0200262
Markus Armbruster18cf67c2020-12-11 18:11:51 +0100263 qdict_put(options, "filename",
264 qstring_from_gstring(fat_filename));
Max Reitz03c320d2017-05-22 21:52:16 +0200265 } else {
266 /* If no protocol prefix was detected, we can use the shortened
267 * filename as-is */
268 qdict_put_str(options, "filename", filename);
269 }
270 }
271}
272
273
Kevin Wolf9c5e6592017-05-04 18:52:40 +0200274/* Returns whether the image file is opened as read-only. Note that this can
275 * return false and writing to the image file is still not possible because the
276 * image is inactivated. */
Jeff Cody93ed5242017-04-07 16:55:28 -0400277bool bdrv_is_read_only(BlockDriverState *bs)
278{
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -0500279 IO_CODE();
Vladimir Sementsov-Ogievskiy975da072021-05-27 18:40:55 +0300280 return !(bs->open_flags & BDRV_O_RDWR);
Jeff Cody93ed5242017-04-07 16:55:28 -0400281}
282
Kevin Wolf4026f1c2023-09-29 16:51:47 +0200283static int GRAPH_RDLOCK
284bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
285 bool ignore_allow_rdw, Error **errp)
Jeff Codyfe5241b2017-04-07 16:55:25 -0400286{
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -0500287 IO_CODE();
288
Jeff Codye2b82472017-04-07 16:55:26 -0400289 /* Do not set read_only if copy_on_read is enabled */
290 if (bs->copy_on_read && read_only) {
291 error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
292 bdrv_get_device_or_node_name(bs));
293 return -EINVAL;
294 }
295
Jeff Codyd6fcdf02017-04-07 16:55:27 -0400296 /* Do not clear read_only if it is prohibited */
Kevin Wolf54a32bf2017-08-03 17:02:58 +0200297 if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) &&
298 !ignore_allow_rdw)
299 {
Jeff Codyd6fcdf02017-04-07 16:55:27 -0400300 error_setg(errp, "Node '%s' is read only",
301 bdrv_get_device_or_node_name(bs));
302 return -EPERM;
303 }
304
Jeff Cody45803a02017-04-07 16:55:29 -0400305 return 0;
306}
307
Kevin Wolfeaa24102018-10-12 11:27:41 +0200308/*
309 * Called by a driver that can only provide a read-only image.
310 *
311 * Returns 0 if the node is already read-only or it could switch the node to
312 * read-only because BDRV_O_AUTO_RDONLY is set.
313 *
314 * Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set
315 * or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg
316 * is not NULL, it is used as the error message for the Error object.
317 */
318int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
319 Error **errp)
Jeff Cody45803a02017-04-07 16:55:29 -0400320{
321 int ret = 0;
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -0500322 IO_CODE();
Jeff Cody45803a02017-04-07 16:55:29 -0400323
Kevin Wolfeaa24102018-10-12 11:27:41 +0200324 if (!(bs->open_flags & BDRV_O_RDWR)) {
325 return 0;
326 }
327 if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) {
328 goto fail;
329 }
330
331 ret = bdrv_can_set_read_only(bs, true, false, NULL);
Jeff Cody45803a02017-04-07 16:55:29 -0400332 if (ret < 0) {
Kevin Wolfeaa24102018-10-12 11:27:41 +0200333 goto fail;
Jeff Cody45803a02017-04-07 16:55:29 -0400334 }
335
Kevin Wolfeaa24102018-10-12 11:27:41 +0200336 bs->open_flags &= ~BDRV_O_RDWR;
Kevin Wolfeeae6a52018-10-09 16:57:12 +0200337
Jeff Codye2b82472017-04-07 16:55:26 -0400338 return 0;
Kevin Wolfeaa24102018-10-12 11:27:41 +0200339
340fail:
341 error_setg(errp, "%s", errmsg ?: "Image is read-only");
342 return -EACCES;
Jeff Codyfe5241b2017-04-07 16:55:25 -0400343}
344
Max Reitz645ae7d2019-02-01 20:29:14 +0100345/*
346 * If @backing is empty, this function returns NULL without setting
347 * @errp. In all other cases, NULL will only be returned with @errp
348 * set.
349 *
350 * Therefore, a return value of NULL without @errp set means that
351 * there is no backing file; if @errp is set, there is one but its
352 * absolute filename cannot be generated.
353 */
354char *bdrv_get_full_backing_filename_from_filename(const char *backed,
355 const char *backing,
356 Error **errp)
Max Reitz0a828552014-11-26 17:20:25 +0100357{
Max Reitz645ae7d2019-02-01 20:29:14 +0100358 if (backing[0] == '\0') {
359 return NULL;
360 } else if (path_has_protocol(backing) || path_is_absolute(backing)) {
361 return g_strdup(backing);
Max Reitz9f074292014-11-26 17:20:26 +0100362 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
363 error_setg(errp, "Cannot use relative backing file names for '%s'",
364 backed);
Max Reitz645ae7d2019-02-01 20:29:14 +0100365 return NULL;
Max Reitz0a828552014-11-26 17:20:25 +0100366 } else {
Max Reitz645ae7d2019-02-01 20:29:14 +0100367 return path_combine(backed, backing);
Max Reitz0a828552014-11-26 17:20:25 +0100368 }
369}
370
Max Reitz9f4793d2019-02-01 20:29:16 +0100371/*
372 * If @filename is empty or NULL, this function returns NULL without
373 * setting @errp. In all other cases, NULL will only be returned with
374 * @errp set.
375 */
Kevin Wolfb7cfc7d2023-09-29 16:51:45 +0200376static char * GRAPH_RDLOCK
377bdrv_make_absolute_filename(BlockDriverState *relative_to,
378 const char *filename, Error **errp)
Max Reitz9f4793d2019-02-01 20:29:16 +0100379{
Max Reitz8df68612019-02-01 20:29:23 +0100380 char *dir, *full_name;
Max Reitz9f4793d2019-02-01 20:29:16 +0100381
Max Reitz8df68612019-02-01 20:29:23 +0100382 if (!filename || filename[0] == '\0') {
383 return NULL;
384 } else if (path_has_protocol(filename) || path_is_absolute(filename)) {
385 return g_strdup(filename);
386 }
Max Reitz9f4793d2019-02-01 20:29:16 +0100387
Max Reitz8df68612019-02-01 20:29:23 +0100388 dir = bdrv_dirname(relative_to, errp);
389 if (!dir) {
390 return NULL;
391 }
Max Reitz9f4793d2019-02-01 20:29:16 +0100392
Max Reitz8df68612019-02-01 20:29:23 +0100393 full_name = g_strconcat(dir, filename, NULL);
394 g_free(dir);
395 return full_name;
Max Reitz9f4793d2019-02-01 20:29:16 +0100396}
397
Max Reitz6b6833c2019-02-01 20:29:15 +0100398char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp)
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200399{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -0500400 GLOBAL_STATE_CODE();
Max Reitz9f4793d2019-02-01 20:29:16 +0100401 return bdrv_make_absolute_filename(bs, bs->backing_file, errp);
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200402}
403
Stefan Hajnoczi0eb72172015-04-28 14:27:51 +0100404void bdrv_register(BlockDriver *bdrv)
405{
Philippe Mathieu-Daudéa15f08d2020-03-18 23:22:35 +0100406 assert(bdrv->format_name);
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -0500407 GLOBAL_STATE_CODE();
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100408 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000409}
bellardb3380822004-03-14 21:38:54 +0000410
Markus Armbrustere4e99862014-10-07 13:59:03 +0200411BlockDriverState *bdrv_new(void)
412{
413 BlockDriverState *bs;
414 int i;
415
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -0500416 GLOBAL_STATE_CODE();
417
Markus Armbruster5839e532014-08-19 10:31:08 +0200418 bs = g_new0(BlockDriverState, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +0800419 QLIST_INIT(&bs->dirty_bitmaps);
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800420 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
421 QLIST_INIT(&bs->op_blockers[i]);
422 }
Stefan Hajnoczifa9185f2023-08-08 11:58:52 -0400423 qemu_mutex_init(&bs->reqs_lock);
Paolo Bonzini21198822017-06-05 14:39:03 +0200424 qemu_mutex_init(&bs->dirty_bitmap_mutex);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800425 bs->refcnt = 1;
Stefan Hajnoczidcd04222014-05-08 16:34:37 +0200426 bs->aio_context = qemu_get_aio_context();
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200427
Evgeny Yakovlev3ff2f672016-07-18 22:39:52 +0300428 qemu_co_queue_init(&bs->flush_queue);
429
Hanna Reitz0bc329f2021-08-12 10:41:44 +0200430 qemu_co_mutex_init(&bs->bsc_modify_lock);
431 bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1);
432
Kevin Wolf0f122642018-03-28 18:29:18 +0200433 for (i = 0; i < bdrv_drain_all_count; i++) {
434 bdrv_drained_begin(bs);
435 }
436
Max Reitz2c1d04e2016-01-29 16:36:11 +0100437 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
438
bellardb3380822004-03-14 21:38:54 +0000439 return bs;
440}
441
Marc Mari88d88792016-08-12 09:27:03 -0400442static BlockDriver *bdrv_do_find_format(const char *format_name)
bellardea2384d2004-08-01 21:59:26 +0000443{
444 BlockDriver *drv1;
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -0500445 GLOBAL_STATE_CODE();
Marc Mari88d88792016-08-12 09:27:03 -0400446
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100447 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
448 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000449 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100450 }
bellardea2384d2004-08-01 21:59:26 +0000451 }
Marc Mari88d88792016-08-12 09:27:03 -0400452
bellardea2384d2004-08-01 21:59:26 +0000453 return NULL;
454}
455
Marc Mari88d88792016-08-12 09:27:03 -0400456BlockDriver *bdrv_find_format(const char *format_name)
457{
458 BlockDriver *drv1;
459 int i;
460
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -0500461 GLOBAL_STATE_CODE();
462
Marc Mari88d88792016-08-12 09:27:03 -0400463 drv1 = bdrv_do_find_format(format_name);
464 if (drv1) {
465 return drv1;
466 }
467
468 /* The driver isn't registered, maybe we need to load a module */
469 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
470 if (!strcmp(block_driver_modules[i].format_name, format_name)) {
Claudio Fontanac551fb02022-09-29 11:30:33 +0200471 Error *local_err = NULL;
472 int rv = block_module_load(block_driver_modules[i].library_name,
473 &local_err);
474 if (rv > 0) {
475 return bdrv_do_find_format(format_name);
476 } else if (rv < 0) {
477 error_report_err(local_err);
478 }
Marc Mari88d88792016-08-12 09:27:03 -0400479 break;
480 }
481 }
Claudio Fontanac551fb02022-09-29 11:30:33 +0200482 return NULL;
Marc Mari88d88792016-08-12 09:27:03 -0400483}
484
Andrey Shinkevich9ac404c2019-03-07 16:33:58 +0300485static int bdrv_format_is_whitelisted(const char *format_name, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100486{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800487 static const char *whitelist_rw[] = {
488 CONFIG_BDRV_RW_WHITELIST
Paolo Bonzini859aef02020-08-04 18:14:26 +0200489 NULL
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800490 };
491 static const char *whitelist_ro[] = {
492 CONFIG_BDRV_RO_WHITELIST
Paolo Bonzini859aef02020-08-04 18:14:26 +0200493 NULL
Markus Armbrustereb852012009-10-27 18:41:44 +0100494 };
495 const char **p;
496
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800497 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100498 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800499 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100500
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800501 for (p = whitelist_rw; *p; p++) {
Andrey Shinkevich9ac404c2019-03-07 16:33:58 +0300502 if (!strcmp(format_name, *p)) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100503 return 1;
504 }
505 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800506 if (read_only) {
507 for (p = whitelist_ro; *p; p++) {
Andrey Shinkevich9ac404c2019-03-07 16:33:58 +0300508 if (!strcmp(format_name, *p)) {
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800509 return 1;
510 }
511 }
512 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100513 return 0;
514}
515
Andrey Shinkevich9ac404c2019-03-07 16:33:58 +0300516int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
517{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -0500518 GLOBAL_STATE_CODE();
Andrey Shinkevich9ac404c2019-03-07 16:33:58 +0300519 return bdrv_format_is_whitelisted(drv->format_name, read_only);
520}
521
Daniel P. Berrangee6ff69b2016-03-21 14:11:48 +0000522bool bdrv_uses_whitelist(void)
523{
524 return use_bdrv_whitelist;
525}
526
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800527typedef struct CreateCo {
528 BlockDriver *drv;
529 char *filename;
Chunyan Liu83d05212014-06-05 17:20:51 +0800530 QemuOpts *opts;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800531 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200532 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800533} CreateCo;
534
Emanuele Giuseppe Esposito741443e2022-11-28 09:23:36 -0500535int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
536 QemuOpts *opts, Error **errp)
Emanuele Giuseppe Esposito84bdf212022-11-28 09:23:30 -0500537{
Zhao Liu46ff64a2024-03-12 14:03:37 +0800538 ERRP_GUARD();
Emanuele Giuseppe Esposito84bdf212022-11-28 09:23:30 -0500539 int ret;
540 GLOBAL_STATE_CODE();
Emanuele Giuseppe Esposito84bdf212022-11-28 09:23:30 -0500541
542 if (!drv->bdrv_co_create_opts) {
543 error_setg(errp, "Driver '%s' does not support image creation",
544 drv->format_name);
545 return -ENOTSUP;
546 }
547
548 ret = drv->bdrv_co_create_opts(drv, filename, opts, errp);
549 if (ret < 0 && !*errp) {
550 error_setg_errno(errp, -ret, "Could not create image");
551 }
552
553 return ret;
554}
555
Max Reitzfd171462020-01-22 17:45:29 +0100556/**
557 * Helper function for bdrv_create_file_fallback(): Resize @blk to at
558 * least the given @minimum_size.
559 *
560 * On success, return @blk's actual length.
561 * Otherwise, return -errno.
562 */
Paolo Bonzini84569a72023-06-01 13:51:38 +0200563static int64_t coroutine_fn GRAPH_UNLOCKED
564create_file_fallback_truncate(BlockBackend *blk, int64_t minimum_size,
565 Error **errp)
Max Reitzfd171462020-01-22 17:45:29 +0100566{
567 Error *local_err = NULL;
568 int64_t size;
569 int ret;
570
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -0500571 GLOBAL_STATE_CODE();
572
Paolo Bonzini84569a72023-06-01 13:51:38 +0200573 ret = blk_co_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
574 &local_err);
Max Reitzfd171462020-01-22 17:45:29 +0100575 if (ret < 0 && ret != -ENOTSUP) {
576 error_propagate(errp, local_err);
577 return ret;
578 }
579
Paolo Bonzini84569a72023-06-01 13:51:38 +0200580 size = blk_co_getlength(blk);
Max Reitzfd171462020-01-22 17:45:29 +0100581 if (size < 0) {
582 error_free(local_err);
583 error_setg_errno(errp, -size,
584 "Failed to inquire the new image file's length");
585 return size;
586 }
587
588 if (size < minimum_size) {
589 /* Need to grow the image, but we failed to do that */
590 error_propagate(errp, local_err);
591 return -ENOTSUP;
592 }
593
594 error_free(local_err);
595 local_err = NULL;
596
597 return size;
598}
599
600/**
601 * Helper function for bdrv_create_file_fallback(): Zero the first
602 * sector to remove any potentially pre-existing image header.
603 */
Paolo Bonzini881a4c52022-09-22 10:49:00 +0200604static int coroutine_fn
605create_file_fallback_zero_first_sector(BlockBackend *blk,
606 int64_t current_size,
607 Error **errp)
Max Reitzfd171462020-01-22 17:45:29 +0100608{
609 int64_t bytes_to_clear;
610 int ret;
611
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -0500612 GLOBAL_STATE_CODE();
613
Max Reitzfd171462020-01-22 17:45:29 +0100614 bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE);
615 if (bytes_to_clear) {
Alberto Fariace47ff22022-10-13 14:37:02 +0200616 ret = blk_co_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP);
Max Reitzfd171462020-01-22 17:45:29 +0100617 if (ret < 0) {
618 error_setg_errno(errp, -ret,
619 "Failed to clear the new image's first sector");
620 return ret;
621 }
622 }
623
624 return 0;
625}
626
Maxim Levitsky5a5e7f82020-03-26 03:12:18 +0200627/**
628 * Simple implementation of bdrv_co_create_opts for protocol drivers
629 * which only support creation via opening a file
630 * (usually existing raw storage device)
631 */
632int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
633 const char *filename,
634 QemuOpts *opts,
635 Error **errp)
Max Reitzfd171462020-01-22 17:45:29 +0100636{
Zhao Liu7b22e052024-03-11 11:37:56 +0800637 ERRP_GUARD();
Max Reitzfd171462020-01-22 17:45:29 +0100638 BlockBackend *blk;
Max Reitzeeea1fa2020-02-25 16:56:18 +0100639 QDict *options;
Max Reitzfd171462020-01-22 17:45:29 +0100640 int64_t size = 0;
641 char *buf = NULL;
642 PreallocMode prealloc;
643 Error *local_err = NULL;
644 int ret;
645
Emanuele Giuseppe Espositob4ad82a2022-03-03 10:15:57 -0500646 GLOBAL_STATE_CODE();
647
Max Reitzfd171462020-01-22 17:45:29 +0100648 size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
649 buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
650 prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
651 PREALLOC_MODE_OFF, &local_err);
652 g_free(buf);
653 if (local_err) {
654 error_propagate(errp, local_err);
655 return -EINVAL;
656 }
657
658 if (prealloc != PREALLOC_MODE_OFF) {
659 error_setg(errp, "Unsupported preallocation mode '%s'",
660 PreallocMode_str(prealloc));
661 return -ENOTSUP;
662 }
663
Max Reitzeeea1fa2020-02-25 16:56:18 +0100664 options = qdict_new();
Max Reitzfd171462020-01-22 17:45:29 +0100665 qdict_put_str(options, "driver", drv->format_name);
666
Kevin Wolfbe1a7322023-01-26 18:24:31 +0100667 blk = blk_co_new_open(filename, NULL, options,
668 BDRV_O_RDWR | BDRV_O_RESIZE, errp);
Max Reitzfd171462020-01-22 17:45:29 +0100669 if (!blk) {
Hanna Czenczek81624862023-07-20 16:00:24 +0200670 error_prepend(errp, "Protocol driver '%s' does not support creating "
671 "new images, so an existing image must be selected as "
672 "the target; however, opening the given target as an "
673 "existing image failed: ",
Max Reitzfd171462020-01-22 17:45:29 +0100674 drv->format_name);
675 return -EINVAL;
676 }
677
678 size = create_file_fallback_truncate(blk, size, errp);
679 if (size < 0) {
680 ret = size;
681 goto out;
682 }
683
684 ret = create_file_fallback_zero_first_sector(blk, size, errp);
685 if (ret < 0) {
686 goto out;
687 }
688
689 ret = 0;
690out:
Kevin Wolfb2ab5f52023-05-04 13:57:33 +0200691 blk_co_unref(blk);
Max Reitzfd171462020-01-22 17:45:29 +0100692 return ret;
693}
694
Emanuele Giuseppe Esposito2475a0d2022-11-28 09:23:31 -0500695int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts,
696 Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200697{
Stefano Garzarella729222a2021-03-08 17:12:32 +0100698 QemuOpts *protocol_opts;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200699 BlockDriver *drv;
Stefano Garzarella729222a2021-03-08 17:12:32 +0100700 QDict *qdict;
701 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200702
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -0500703 GLOBAL_STATE_CODE();
704
Max Reitzb65a5e12015-02-05 13:58:12 -0500705 drv = bdrv_find_protocol(filename, true, errp);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200706 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000707 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200708 }
709
Stefano Garzarella729222a2021-03-08 17:12:32 +0100710 if (!drv->create_opts) {
711 error_setg(errp, "Driver '%s' does not support image creation",
712 drv->format_name);
713 return -ENOTSUP;
714 }
715
716 /*
717 * 'opts' contains a QemuOptsList with a combination of format and protocol
718 * default values.
719 *
720 * The format properly removes its options, but the default values remain
721 * in 'opts->list'. So if the protocol has options with the same name
722 * (e.g. rbd has 'cluster_size' as qcow2), it will see the default values
723 * of the format, since for overlapping options, the format wins.
724 *
725 * To avoid this issue, lets convert QemuOpts to QDict, in this way we take
726 * only the set options, and then convert it back to QemuOpts, using the
727 * create_opts of the protocol. So the new QemuOpts, will contain only the
728 * protocol defaults.
729 */
730 qdict = qemu_opts_to_qdict(opts, NULL);
731 protocol_opts = qemu_opts_from_qdict(drv->create_opts, qdict, errp);
732 if (protocol_opts == NULL) {
733 ret = -EINVAL;
734 goto out;
735 }
736
Emanuele Giuseppe Esposito2475a0d2022-11-28 09:23:31 -0500737 ret = bdrv_co_create(drv, filename, protocol_opts, errp);
Stefano Garzarella729222a2021-03-08 17:12:32 +0100738out:
739 qemu_opts_del(protocol_opts);
740 qobject_unref(qdict);
741 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200742}
743
Daniel Henrique Barbozae1d7f8b2020-01-30 18:39:05 -0300744int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
745{
746 Error *local_err = NULL;
747 int ret;
748
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -0500749 IO_CODE();
Daniel Henrique Barbozae1d7f8b2020-01-30 18:39:05 -0300750 assert(bs != NULL);
Kevin Wolf48aef792023-02-03 16:22:00 +0100751 assert_bdrv_graph_readable();
Daniel Henrique Barbozae1d7f8b2020-01-30 18:39:05 -0300752
753 if (!bs->drv) {
754 error_setg(errp, "Block node '%s' is not opened", bs->filename);
755 return -ENOMEDIUM;
756 }
757
758 if (!bs->drv->bdrv_co_delete_file) {
759 error_setg(errp, "Driver '%s' does not support image deletion",
760 bs->drv->format_name);
761 return -ENOTSUP;
762 }
763
764 ret = bs->drv->bdrv_co_delete_file(bs, &local_err);
765 if (ret < 0) {
766 error_propagate(errp, local_err);
767 }
768
769 return ret;
770}
771
Maxim Levitskya890f082020-12-17 19:09:03 +0200772void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs)
773{
774 Error *local_err = NULL;
775 int ret;
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -0500776 IO_CODE();
Maxim Levitskya890f082020-12-17 19:09:03 +0200777
778 if (!bs) {
779 return;
780 }
781
782 ret = bdrv_co_delete_file(bs, &local_err);
783 /*
784 * ENOTSUP will happen if the block driver doesn't support
785 * the 'bdrv_co_delete_file' interface. This is a predictable
786 * scenario and shouldn't be reported back to the user.
787 */
788 if (ret == -ENOTSUP) {
789 error_free(local_err);
790 } else if (ret < 0) {
791 error_report_err(local_err);
792 }
793}
794
Ekaterina Tumanova892b7de2015-02-16 12:47:54 +0100795/**
796 * Try to get @bs's logical and physical block size.
797 * On success, store them in @bsz struct and return 0.
798 * On failure return -errno.
799 * @bs must not be empty.
800 */
801int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
802{
803 BlockDriver *drv = bs->drv;
Max Reitz93393e62019-06-12 17:03:38 +0200804 BlockDriverState *filtered = bdrv_filter_bs(bs);
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -0500805 GLOBAL_STATE_CODE();
Ekaterina Tumanova892b7de2015-02-16 12:47:54 +0100806
807 if (drv && drv->bdrv_probe_blocksizes) {
808 return drv->bdrv_probe_blocksizes(bs, bsz);
Max Reitz93393e62019-06-12 17:03:38 +0200809 } else if (filtered) {
810 return bdrv_probe_blocksizes(filtered, bsz);
Ekaterina Tumanova892b7de2015-02-16 12:47:54 +0100811 }
812
813 return -ENOTSUP;
814}
815
816/**
817 * Try to get @bs's geometry (cyls, heads, sectors).
818 * On success, store them in @geo struct and return 0.
819 * On failure return -errno.
820 * @bs must not be empty.
821 */
822int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
823{
824 BlockDriver *drv = bs->drv;
Kevin Wolff5a3a272023-10-27 17:53:12 +0200825 BlockDriverState *filtered;
826
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -0500827 GLOBAL_STATE_CODE();
Kevin Wolff5a3a272023-10-27 17:53:12 +0200828 GRAPH_RDLOCK_GUARD_MAINLOOP();
Ekaterina Tumanova892b7de2015-02-16 12:47:54 +0100829
830 if (drv && drv->bdrv_probe_geometry) {
831 return drv->bdrv_probe_geometry(bs, geo);
Kevin Wolff5a3a272023-10-27 17:53:12 +0200832 }
833
834 filtered = bdrv_filter_bs(bs);
835 if (filtered) {
Max Reitz93393e62019-06-12 17:03:38 +0200836 return bdrv_probe_geometry(filtered, geo);
Ekaterina Tumanova892b7de2015-02-16 12:47:54 +0100837 }
838
839 return -ENOTSUP;
840}
841
Jim Meyeringeba25052012-05-28 09:27:54 +0200842/*
843 * Create a uniquely-named empty temporary file.
Bin Meng69fbfff2022-10-10 12:04:31 +0800844 * Return the actual file name used upon success, otherwise NULL.
845 * This string should be freed with g_free() when not needed any longer.
846 *
847 * Note: creating a temporary file for the caller to (re)open is
848 * inherently racy. Use g_file_open_tmp() instead whenever practical.
Jim Meyeringeba25052012-05-28 09:27:54 +0200849 */
Bin Meng69fbfff2022-10-10 12:04:31 +0800850char *create_tmp_file(Error **errp)
Jim Meyeringeba25052012-05-28 09:27:54 +0200851{
bellardea2384d2004-08-01 21:59:26 +0000852 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000853 const char *tmpdir;
Bin Meng69fbfff2022-10-10 12:04:31 +0800854 g_autofree char *filename = NULL;
855
856 tmpdir = g_get_tmp_dir();
857#ifndef _WIN32
858 /*
859 * See commit 69bef79 ("block: use /var/tmp instead of /tmp for -snapshot")
860 *
861 * This function is used to create temporary disk images (like -snapshot),
862 * so the files can become very large. /tmp is often a tmpfs where as
863 * /var/tmp is usually on a disk, so more appropriate for disk images.
864 */
865 if (!g_strcmp0(tmpdir, "/tmp")) {
Amit Shah69bef792014-02-26 15:12:37 +0530866 tmpdir = "/var/tmp";
867 }
bellardd5249392004-08-03 21:14:23 +0000868#endif
Bin Meng69fbfff2022-10-10 12:04:31 +0800869
870 filename = g_strdup_printf("%s/vl.XXXXXX", tmpdir);
871 fd = g_mkstemp(filename);
bellardea2384d2004-08-01 21:59:26 +0000872 if (fd < 0) {
Bin Meng69fbfff2022-10-10 12:04:31 +0800873 error_setg_errno(errp, errno, "Could not open temporary file '%s'",
874 filename);
875 return NULL;
bellardea2384d2004-08-01 21:59:26 +0000876 }
Bin Meng6b6471e2022-10-10 12:04:30 +0800877 close(fd);
Bin Meng69fbfff2022-10-10 12:04:31 +0800878
879 return g_steal_pointer(&filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200880}
bellardea2384d2004-08-01 21:59:26 +0000881
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200882/*
883 * Detect host devices. By convention, /dev/cdrom[N] is always
884 * recognized as a host CDROM.
885 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200886static BlockDriver *find_hdev_driver(const char *filename)
887{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200888 int score_max = 0, score;
889 BlockDriver *drv = NULL, *d;
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -0500890 GLOBAL_STATE_CODE();
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200891
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100892 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200893 if (d->bdrv_probe_device) {
894 score = d->bdrv_probe_device(filename);
895 if (score > score_max) {
896 score_max = score;
897 drv = d;
898 }
899 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200900 }
901
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200902 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200903}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200904
Marc Mari88d88792016-08-12 09:27:03 -0400905static BlockDriver *bdrv_do_find_protocol(const char *protocol)
906{
907 BlockDriver *drv1;
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -0500908 GLOBAL_STATE_CODE();
Marc Mari88d88792016-08-12 09:27:03 -0400909
910 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
911 if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
912 return drv1;
913 }
914 }
915
916 return NULL;
917}
918
Kevin Wolf98289622013-07-10 15:47:39 +0200919BlockDriver *bdrv_find_protocol(const char *filename,
Max Reitzb65a5e12015-02-05 13:58:12 -0500920 bool allow_protocol_prefix,
921 Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200922{
923 BlockDriver *drv1;
924 char protocol[128];
925 int len;
926 const char *p;
Marc Mari88d88792016-08-12 09:27:03 -0400927 int i;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200928
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -0500929 GLOBAL_STATE_CODE();
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200930
Christoph Hellwig39508e72010-06-23 12:25:17 +0200931 /*
932 * XXX(hch): we really should not let host device detection
933 * override an explicit protocol specification, but moving this
934 * later breaks access to device names with colons in them.
935 * Thanks to the brain-dead persistent naming schemes on udev-
936 * based Linux systems those actually are quite common.
937 */
938 drv1 = find_hdev_driver(filename);
939 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200940 return drv1;
941 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200942
Kevin Wolf98289622013-07-10 15:47:39 +0200943 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Max Reitzef810432014-12-02 18:32:42 +0100944 return &bdrv_file;
Christoph Hellwig39508e72010-06-23 12:25:17 +0200945 }
Kevin Wolf98289622013-07-10 15:47:39 +0200946
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000947 p = strchr(filename, ':');
948 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200949 len = p - filename;
950 if (len > sizeof(protocol) - 1)
951 len = sizeof(protocol) - 1;
952 memcpy(protocol, filename, len);
953 protocol[len] = '\0';
Marc Mari88d88792016-08-12 09:27:03 -0400954
955 drv1 = bdrv_do_find_protocol(protocol);
956 if (drv1) {
957 return drv1;
958 }
959
960 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
961 if (block_driver_modules[i].protocol_name &&
962 !strcmp(block_driver_modules[i].protocol_name, protocol)) {
Claudio Fontanac551fb02022-09-29 11:30:33 +0200963 int rv = block_module_load(block_driver_modules[i].library_name, errp);
964 if (rv > 0) {
965 drv1 = bdrv_do_find_protocol(protocol);
966 } else if (rv < 0) {
967 return NULL;
968 }
Marc Mari88d88792016-08-12 09:27:03 -0400969 break;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200970 }
971 }
Max Reitzb65a5e12015-02-05 13:58:12 -0500972
Marc Mari88d88792016-08-12 09:27:03 -0400973 if (!drv1) {
974 error_setg(errp, "Unknown protocol '%s'", protocol);
975 }
976 return drv1;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200977}
978
Markus Armbrusterc6684242014-11-20 16:27:10 +0100979/*
980 * Guess image format by probing its contents.
981 * This is not a good idea when your image is raw (CVE-2008-2004), but
982 * we do it anyway for backward compatibility.
983 *
984 * @buf contains the image's first @buf_size bytes.
Kevin Wolf7cddd372014-11-20 16:27:11 +0100985 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
986 * but can be smaller if the image file is smaller)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100987 * @filename is its filename.
988 *
989 * For all block drivers, call the bdrv_probe() method to get its
990 * probing score.
991 * Return the first block driver with the highest probing score.
992 */
Kevin Wolf38f3ef52014-11-20 16:27:12 +0100993BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
994 const char *filename)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100995{
996 int score_max = 0, score;
997 BlockDriver *drv = NULL, *d;
Emanuele Giuseppe Esposito967d7902022-03-03 10:15:58 -0500998 IO_CODE();
Markus Armbrusterc6684242014-11-20 16:27:10 +0100999
1000 QLIST_FOREACH(d, &bdrv_drivers, list) {
1001 if (d->bdrv_probe) {
1002 score = d->bdrv_probe(buf, buf_size, filename);
1003 if (score > score_max) {
1004 score_max = score;
1005 drv = d;
1006 }
1007 }
1008 }
1009
1010 return drv;
1011}
1012
Kevin Wolf5696c6e2017-02-17 18:39:24 +01001013static int find_image_format(BlockBackend *file, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +02001014 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001015{
Markus Armbrusterc6684242014-11-20 16:27:10 +01001016 BlockDriver *drv;
Kevin Wolf7cddd372014-11-20 16:27:11 +01001017 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
Kevin Wolff500a6d2012-11-12 17:35:27 +01001018 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -07001019
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05001020 GLOBAL_STATE_CODE();
1021
Kevin Wolf08a00552010-06-01 18:37:31 +02001022 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Kevin Wolf5696c6e2017-02-17 18:39:24 +01001023 if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
Max Reitzef810432014-12-02 18:32:42 +01001024 *pdrv = &bdrv_raw;
Stefan Weilc98ac352010-07-21 21:51:51 +02001025 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -07001026 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -07001027
Alberto Fariaa9262f52022-07-05 17:15:11 +01001028 ret = blk_pread(file, 0, sizeof(buf), buf, 0);
bellard83f64092006-08-01 16:21:11 +00001029 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001030 error_setg_errno(errp, -ret, "Could not read image for determining its "
1031 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +02001032 *pdrv = NULL;
1033 return ret;
bellard83f64092006-08-01 16:21:11 +00001034 }
1035
Alberto Fariabf5b16f2022-07-05 17:15:09 +01001036 drv = bdrv_probe_all(buf, sizeof(buf), filename);
Stefan Weilc98ac352010-07-21 21:51:51 +02001037 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001038 error_setg(errp, "Could not determine image format: No compatible "
1039 "driver found");
Alberto Fariabf5b16f2022-07-05 17:15:09 +01001040 *pdrv = NULL;
1041 return -ENOENT;
Stefan Weilc98ac352010-07-21 21:51:51 +02001042 }
Alberto Fariabf5b16f2022-07-05 17:15:09 +01001043
Stefan Weilc98ac352010-07-21 21:51:51 +02001044 *pdrv = drv;
Alberto Fariabf5b16f2022-07-05 17:15:09 +01001045 return 0;
bellardea2384d2004-08-01 21:59:26 +00001046}
1047
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001048/**
1049 * Set the current 'total_sectors' value
Markus Armbruster65a9bb22014-06-26 13:23:17 +02001050 * Return 0 on success, -errno on error.
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001051 */
Emanuele Giuseppe Espositoc86422c2023-01-13 21:42:04 +01001052int coroutine_fn bdrv_co_refresh_total_sectors(BlockDriverState *bs,
1053 int64_t hint)
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001054{
1055 BlockDriver *drv = bs->drv;
Emanuele Giuseppe Esposito967d7902022-03-03 10:15:58 -05001056 IO_CODE();
Kevin Wolf8ab81402023-02-03 16:22:02 +01001057 assert_bdrv_graph_readable();
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001058
Max Reitzd470ad42017-11-10 21:31:09 +01001059 if (!drv) {
1060 return -ENOMEDIUM;
1061 }
1062
Emanuele Giuseppe Espositoc86422c2023-01-13 21:42:04 +01001063 /* Do not attempt drv->bdrv_co_getlength() on scsi-generic devices */
Dimitris Aragiorgisb192af82015-06-23 13:44:56 +03001064 if (bdrv_is_sg(bs))
Nicholas Bellinger396759a2010-05-17 09:46:04 -07001065 return 0;
1066
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001067 /* query actual device if possible, otherwise just trust the hint */
Emanuele Giuseppe Espositoc86422c2023-01-13 21:42:04 +01001068 if (drv->bdrv_co_getlength) {
1069 int64_t length = drv->bdrv_co_getlength(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001070 if (length < 0) {
1071 return length;
1072 }
Fam Zheng7e382002013-11-06 19:48:06 +08001073 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001074 }
1075
1076 bs->total_sectors = hint;
Vladimir Sementsov-Ogievskiy8b117002020-12-04 01:27:13 +03001077
1078 if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) {
1079 return -EFBIG;
1080 }
1081
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001082 return 0;
1083}
1084
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +01001085/**
Kevin Wolfcddff5b2015-11-16 16:43:27 +01001086 * Combines a QDict of new block driver @options with any missing options taken
1087 * from @old_options, so that leaving out an option defaults to its old value.
1088 */
1089static void bdrv_join_options(BlockDriverState *bs, QDict *options,
1090 QDict *old_options)
1091{
Emanuele Giuseppe Espositoda359902022-03-03 10:16:11 -05001092 GLOBAL_STATE_CODE();
Kevin Wolfcddff5b2015-11-16 16:43:27 +01001093 if (bs->drv && bs->drv->bdrv_join_options) {
1094 bs->drv->bdrv_join_options(options, old_options);
1095 } else {
1096 qdict_join(options, old_options, false);
1097 }
1098}
1099
Alberto Garcia543770b2018-09-06 12:37:09 +03001100static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts,
1101 int open_flags,
1102 Error **errp)
1103{
1104 Error *local_err = NULL;
1105 char *value = qemu_opt_get_del(opts, "detect-zeroes");
1106 BlockdevDetectZeroesOptions detect_zeroes =
1107 qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value,
1108 BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err);
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05001109 GLOBAL_STATE_CODE();
Alberto Garcia543770b2018-09-06 12:37:09 +03001110 g_free(value);
1111 if (local_err) {
1112 error_propagate(errp, local_err);
1113 return detect_zeroes;
1114 }
1115
1116 if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
1117 !(open_flags & BDRV_O_UNMAP))
1118 {
1119 error_setg(errp, "setting detect-zeroes to unmap is not allowed "
1120 "without setting discard operation to unmap");
1121 }
1122
1123 return detect_zeroes;
1124}
1125
Kevin Wolfcddff5b2015-11-16 16:43:27 +01001126/**
Aarushi Mehtaf80f2672020-01-20 14:18:50 +00001127 * Set open flags for aio engine
1128 *
1129 * Return 0 on success, -1 if the engine specified is invalid
1130 */
1131int bdrv_parse_aio(const char *mode, int *flags)
1132{
1133 if (!strcmp(mode, "threads")) {
1134 /* do nothing, default */
1135 } else if (!strcmp(mode, "native")) {
1136 *flags |= BDRV_O_NATIVE_AIO;
1137#ifdef CONFIG_LINUX_IO_URING
1138 } else if (!strcmp(mode, "io_uring")) {
1139 *flags |= BDRV_O_IO_URING;
1140#endif
1141 } else {
1142 return -1;
1143 }
1144
1145 return 0;
1146}
1147
1148/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01001149 * Set open flags for a given discard mode
1150 *
1151 * Return 0 on success, -1 if the discard mode was invalid.
1152 */
1153int bdrv_parse_discard_flags(const char *mode, int *flags)
1154{
1155 *flags &= ~BDRV_O_UNMAP;
1156
1157 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
1158 /* do nothing */
1159 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
1160 *flags |= BDRV_O_UNMAP;
1161 } else {
1162 return -1;
1163 }
1164
1165 return 0;
1166}
1167
1168/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +01001169 * Set open flags for a given cache mode
1170 *
1171 * Return 0 on success, -1 if the cache mode was invalid.
1172 */
Kevin Wolf53e8ae02016-03-18 15:36:58 +01001173int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +01001174{
1175 *flags &= ~BDRV_O_CACHE_MASK;
1176
1177 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
Kevin Wolf53e8ae02016-03-18 15:36:58 +01001178 *writethrough = false;
1179 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001180 } else if (!strcmp(mode, "directsync")) {
Kevin Wolf53e8ae02016-03-18 15:36:58 +01001181 *writethrough = true;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001182 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +01001183 } else if (!strcmp(mode, "writeback")) {
Kevin Wolf53e8ae02016-03-18 15:36:58 +01001184 *writethrough = false;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +01001185 } else if (!strcmp(mode, "unsafe")) {
Kevin Wolf53e8ae02016-03-18 15:36:58 +01001186 *writethrough = false;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +01001187 *flags |= BDRV_O_NO_FLUSH;
1188 } else if (!strcmp(mode, "writethrough")) {
Kevin Wolf53e8ae02016-03-18 15:36:58 +01001189 *writethrough = true;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +01001190 } else {
1191 return -1;
1192 }
1193
1194 return 0;
1195}
1196
Kevin Wolfb5411552017-01-17 15:56:16 +01001197static char *bdrv_child_get_parent_desc(BdrvChild *c)
1198{
1199 BlockDriverState *parent = c->opaque;
Vladimir Sementsov-Ogievskiy2c0a3ac2021-06-01 10:52:15 +03001200 return g_strdup_printf("node '%s'", bdrv_get_node_name(parent));
Kevin Wolfb5411552017-01-17 15:56:16 +01001201}
1202
Emanuele Giuseppe Espositod05ab382023-09-29 16:51:40 +02001203static void GRAPH_RDLOCK bdrv_child_cb_drained_begin(BdrvChild *child)
Kevin Wolf20018e12016-05-23 18:46:59 +02001204{
1205 BlockDriverState *bs = child->opaque;
Kevin Wolfa82a3bd2022-11-18 18:41:07 +01001206 bdrv_do_drained_begin_quiesce(bs, NULL);
Kevin Wolf20018e12016-05-23 18:46:59 +02001207}
1208
Emanuele Giuseppe Espositod05ab382023-09-29 16:51:40 +02001209static bool GRAPH_RDLOCK bdrv_child_cb_drained_poll(BdrvChild *child)
Kevin Wolf89bd0302018-03-22 14:11:20 +01001210{
1211 BlockDriverState *bs = child->opaque;
Kevin Wolf299403a2022-11-18 18:41:05 +01001212 return bdrv_drain_poll(bs, NULL, false);
Kevin Wolf89bd0302018-03-22 14:11:20 +01001213}
1214
Emanuele Giuseppe Espositod05ab382023-09-29 16:51:40 +02001215static void GRAPH_RDLOCK bdrv_child_cb_drained_end(BdrvChild *child)
Kevin Wolf20018e12016-05-23 18:46:59 +02001216{
1217 BlockDriverState *bs = child->opaque;
Kevin Wolf2f65df62022-11-18 18:40:59 +01001218 bdrv_drained_end(bs);
Kevin Wolf20018e12016-05-23 18:46:59 +02001219}
1220
Kevin Wolf38701b62017-05-04 18:52:39 +02001221static int bdrv_child_cb_inactivate(BdrvChild *child)
1222{
1223 BlockDriverState *bs = child->opaque;
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05001224 GLOBAL_STATE_CODE();
Kevin Wolf38701b62017-05-04 18:52:39 +02001225 assert(bs->open_flags & BDRV_O_INACTIVE);
1226 return 0;
1227}
1228
Fiona Ebner844d5502025-05-30 17:10:43 +02001229static bool GRAPH_RDLOCK
1230bdrv_child_cb_change_aio_ctx(BdrvChild *child, AioContext *ctx,
1231 GHashTable *visited, Transaction *tran,
1232 Error **errp)
Kevin Wolf5d231842019-05-06 19:17:56 +02001233{
1234 BlockDriverState *bs = child->opaque;
Emanuele Giuseppe Esposito27633e72022-10-25 04:49:47 -04001235 return bdrv_change_aio_context(bs, ctx, visited, tran, errp);
Kevin Wolf53a7d042019-05-06 19:17:59 +02001236}
1237
Kevin Wolf0b50cc82014-04-11 21:29:52 +02001238/*
Kevin Wolf73176be2016-03-07 13:02:15 +01001239 * Returns the options and flags that a temporary snapshot should get, based on
1240 * the originally requested flags (the originally requested image will have
1241 * flags like a backing file)
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001242 */
Kevin Wolf73176be2016-03-07 13:02:15 +01001243static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
1244 int parent_flags, QDict *parent_options)
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001245{
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05001246 GLOBAL_STATE_CODE();
Kevin Wolf73176be2016-03-07 13:02:15 +01001247 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
1248
1249 /* For temporary files, unconditional cache=unsafe is fine */
Kevin Wolf73176be2016-03-07 13:02:15 +01001250 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
1251 qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
Kevin Wolf41869042016-06-16 12:59:30 +02001252
Kevin Wolf3f486862019-04-04 17:04:43 +02001253 /* Copy the read-only and discard options from the parent */
Alberto Garciaf87a0e22016-09-15 17:53:02 +03001254 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
Kevin Wolf3f486862019-04-04 17:04:43 +02001255 qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD);
Alberto Garciaf87a0e22016-09-15 17:53:02 +03001256
Kevin Wolf41869042016-06-16 12:59:30 +02001257 /* aio=native doesn't work for cache.direct=off, so disable it for the
1258 * temporary snapshot */
1259 *child_flags &= ~BDRV_O_NATIVE_AIO;
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001260}
1261
Kevin Wolfb7cfc7d2023-09-29 16:51:45 +02001262static void GRAPH_WRLOCK bdrv_backing_attach(BdrvChild *c)
Kevin Wolfdb95dbb2017-02-08 11:28:52 +01001263{
1264 BlockDriverState *parent = c->opaque;
1265 BlockDriverState *backing_hd = c->bs;
1266
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05001267 GLOBAL_STATE_CODE();
Kevin Wolfdb95dbb2017-02-08 11:28:52 +01001268 assert(!parent->backing_blocker);
1269 error_setg(&parent->backing_blocker,
1270 "node is used as backing hd of '%s'",
1271 bdrv_get_device_or_node_name(parent));
1272
Max Reitzf30c66b2019-02-01 20:29:05 +01001273 bdrv_refresh_filename(backing_hd);
1274
Kevin Wolfdb95dbb2017-02-08 11:28:52 +01001275 parent->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolfdb95dbb2017-02-08 11:28:52 +01001276
1277 bdrv_op_block_all(backing_hd, parent->backing_blocker);
1278 /* Otherwise we won't be able to commit or stream */
1279 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1280 parent->backing_blocker);
1281 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
1282 parent->backing_blocker);
1283 /*
1284 * We do backup in 3 ways:
1285 * 1. drive backup
1286 * The target bs is new opened, and the source is top BDS
1287 * 2. blockdev backup
1288 * Both the source and the target are top BDSes.
1289 * 3. internal backup(used for block replication)
1290 * Both the source and the target are backing file
1291 *
1292 * In case 1 and 2, neither the source nor the target is the backing file.
1293 * In case 3, we will block the top BDS, so there is only one block job
1294 * for the top BDS and its backing chain.
1295 */
1296 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
1297 parent->backing_blocker);
1298 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
1299 parent->backing_blocker);
Max Reitzca2f1232020-05-13 13:05:22 +02001300}
Kevin Wolfd736f112017-12-18 16:05:48 +01001301
Kevin Wolfdb95dbb2017-02-08 11:28:52 +01001302static void bdrv_backing_detach(BdrvChild *c)
1303{
1304 BlockDriverState *parent = c->opaque;
1305
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05001306 GLOBAL_STATE_CODE();
Kevin Wolfdb95dbb2017-02-08 11:28:52 +01001307 assert(parent->backing_blocker);
1308 bdrv_op_unblock_all(c->bs, parent->backing_blocker);
1309 error_free(parent->backing_blocker);
1310 parent->backing_blocker = NULL;
Max Reitz48e08282020-05-13 13:05:23 +02001311}
Kevin Wolfd736f112017-12-18 16:05:48 +01001312
Kevin Wolf6858eba2017-06-29 19:32:21 +02001313static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
Peter Krempa4b028cb2023-12-05 18:14:41 +01001314 const char *filename,
1315 bool backing_mask_protocol,
1316 Error **errp)
Kevin Wolf6858eba2017-06-29 19:32:21 +02001317{
1318 BlockDriverState *parent = c->opaque;
Alberto Garciae94d3db2018-11-12 16:00:34 +02001319 bool read_only = bdrv_is_read_only(parent);
Kevin Wolf6858eba2017-06-29 19:32:21 +02001320 int ret;
Peter Krempa4b028cb2023-12-05 18:14:41 +01001321 const char *format_name;
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05001322 GLOBAL_STATE_CODE();
Kevin Wolf6858eba2017-06-29 19:32:21 +02001323
Alberto Garciae94d3db2018-11-12 16:00:34 +02001324 if (read_only) {
1325 ret = bdrv_reopen_set_read_only(parent, false, errp);
Kevin Wolf61f09ce2017-09-19 16:22:54 +02001326 if (ret < 0) {
1327 return ret;
1328 }
1329 }
1330
Peter Krempa4b028cb2023-12-05 18:14:41 +01001331 if (base->drv) {
1332 /*
1333 * If the new base image doesn't have a format driver layer, which we
1334 * detect by the fact that @base is a protocol driver, we record
1335 * 'raw' as the format instead of putting the protocol name as the
1336 * backing format
1337 */
1338 if (backing_mask_protocol && base->drv->protocol_name) {
1339 format_name = "raw";
1340 } else {
1341 format_name = base->drv->format_name;
1342 }
1343 } else {
1344 format_name = "";
1345 }
1346
1347 ret = bdrv_change_backing_file(parent, filename, format_name, false);
Kevin Wolf6858eba2017-06-29 19:32:21 +02001348 if (ret < 0) {
Kevin Wolf64730692017-11-06 17:52:58 +01001349 error_setg_errno(errp, -ret, "Could not update backing file link");
Kevin Wolf6858eba2017-06-29 19:32:21 +02001350 }
1351
Alberto Garciae94d3db2018-11-12 16:00:34 +02001352 if (read_only) {
1353 bdrv_reopen_set_read_only(parent, true, NULL);
Kevin Wolf61f09ce2017-09-19 16:22:54 +02001354 }
1355
Kevin Wolf6858eba2017-06-29 19:32:21 +02001356 return ret;
1357}
1358
Max Reitzfae8bd32020-05-13 13:05:20 +02001359/*
1360 * Returns the options and flags that a generic child of a BDS should
1361 * get, based on the given options and flags for the parent BDS.
1362 */
Max Reitz00ff7ff2020-05-13 13:05:21 +02001363static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
1364 int *child_flags, QDict *child_options,
1365 int parent_flags, QDict *parent_options)
Max Reitzfae8bd32020-05-13 13:05:20 +02001366{
1367 int flags = parent_flags;
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05001368 GLOBAL_STATE_CODE();
Max Reitzfae8bd32020-05-13 13:05:20 +02001369
1370 /*
1371 * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL.
1372 * Generally, the question to answer is: Should this child be
1373 * format-probed by default?
1374 */
1375
1376 /*
1377 * Pure and non-filtered data children of non-format nodes should
1378 * be probed by default (even when the node itself has BDRV_O_PROTOCOL
1379 * set). This only affects a very limited set of drivers (namely
1380 * quorum and blkverify when this comment was written).
1381 * Force-clear BDRV_O_PROTOCOL then.
1382 */
1383 if (!parent_is_format &&
1384 (role & BDRV_CHILD_DATA) &&
1385 !(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED)))
1386 {
1387 flags &= ~BDRV_O_PROTOCOL;
1388 }
1389
1390 /*
1391 * All children of format nodes (except for COW children) and all
1392 * metadata children in general should never be format-probed.
1393 * Force-set BDRV_O_PROTOCOL then.
1394 */
1395 if ((parent_is_format && !(role & BDRV_CHILD_COW)) ||
1396 (role & BDRV_CHILD_METADATA))
1397 {
1398 flags |= BDRV_O_PROTOCOL;
1399 }
1400
1401 /*
1402 * If the cache mode isn't explicitly set, inherit direct and no-flush from
1403 * the parent.
1404 */
1405 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
1406 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
1407 qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);
1408
1409 if (role & BDRV_CHILD_COW) {
1410 /* backing files are opened read-only by default */
1411 qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on");
1412 qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off");
1413 } else {
1414 /* Inherit the read-only option from the parent if it's not set */
1415 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
1416 qdict_copy_default(child_options, parent_options,
1417 BDRV_OPT_AUTO_READ_ONLY);
1418 }
1419
1420 /*
1421 * bdrv_co_pdiscard() respects unmap policy for the parent, so we
1422 * can default to enable it on lower layers regardless of the
1423 * parent option.
1424 */
1425 qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap");
1426
1427 /* Clear flags that only apply to the top layer */
1428 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
1429
1430 if (role & BDRV_CHILD_METADATA) {
1431 flags &= ~BDRV_O_NO_IO;
1432 }
1433 if (role & BDRV_CHILD_COW) {
1434 flags &= ~BDRV_O_TEMPORARY;
1435 }
1436
1437 *child_flags = flags;
1438}
1439
Kevin Wolf303de472022-12-07 14:18:35 +01001440static void GRAPH_WRLOCK bdrv_child_cb_attach(BdrvChild *child)
Max Reitzca2f1232020-05-13 13:05:22 +02001441{
1442 BlockDriverState *bs = child->opaque;
1443
Emanuele Giuseppe Esposito3f35f822022-12-07 14:18:33 +01001444 assert_bdrv_graph_writable();
Hanna Reitza2253692021-11-15 15:53:58 +01001445 QLIST_INSERT_HEAD(&bs->children, child, next);
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03001446 if (bs->drv->is_filter || (child->role & BDRV_CHILD_FILTERED)) {
1447 /*
1448 * Here we handle filters and block/raw-format.c when it behave like
1449 * filter. They generally have a single PRIMARY child, which is also the
1450 * FILTERED child, and that they may have multiple more children, which
1451 * are neither PRIMARY nor FILTERED. And never we have a COW child here.
1452 * So bs->file will be the PRIMARY child, unless the PRIMARY child goes
1453 * into bs->backing on exceptional cases; and bs->backing will be
1454 * nothing else.
1455 */
1456 assert(!(child->role & BDRV_CHILD_COW));
1457 if (child->role & BDRV_CHILD_PRIMARY) {
1458 assert(child->role & BDRV_CHILD_FILTERED);
1459 assert(!bs->backing);
1460 assert(!bs->file);
Hanna Reitza2253692021-11-15 15:53:58 +01001461
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03001462 if (bs->drv->filtered_child_is_backing) {
1463 bs->backing = child;
1464 } else {
1465 bs->file = child;
1466 }
1467 } else {
1468 assert(!(child->role & BDRV_CHILD_FILTERED));
1469 }
1470 } else if (child->role & BDRV_CHILD_COW) {
1471 assert(bs->drv->supports_backing);
1472 assert(!(child->role & BDRV_CHILD_PRIMARY));
1473 assert(!bs->backing);
1474 bs->backing = child;
Max Reitzca2f1232020-05-13 13:05:22 +02001475 bdrv_backing_attach(child);
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03001476 } else if (child->role & BDRV_CHILD_PRIMARY) {
1477 assert(!bs->file);
1478 bs->file = child;
Max Reitzca2f1232020-05-13 13:05:22 +02001479 }
Max Reitzca2f1232020-05-13 13:05:22 +02001480}
1481
Kevin Wolf303de472022-12-07 14:18:35 +01001482static void GRAPH_WRLOCK bdrv_child_cb_detach(BdrvChild *child)
Max Reitz48e08282020-05-13 13:05:23 +02001483{
1484 BlockDriverState *bs = child->opaque;
1485
1486 if (child->role & BDRV_CHILD_COW) {
1487 bdrv_backing_detach(child);
1488 }
1489
Emanuele Giuseppe Esposito3f35f822022-12-07 14:18:33 +01001490 assert_bdrv_graph_writable();
Hanna Reitza2253692021-11-15 15:53:58 +01001491 QLIST_REMOVE(child, next);
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03001492 if (child == bs->backing) {
1493 assert(child != bs->file);
1494 bs->backing = NULL;
1495 } else if (child == bs->file) {
1496 bs->file = NULL;
1497 }
Max Reitz48e08282020-05-13 13:05:23 +02001498}
1499
Max Reitz43483552020-05-13 13:05:24 +02001500static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
Peter Krempa4b028cb2023-12-05 18:14:41 +01001501 const char *filename,
1502 bool backing_mask_protocol,
1503 Error **errp)
Max Reitz43483552020-05-13 13:05:24 +02001504{
1505 if (c->role & BDRV_CHILD_COW) {
Peter Krempa4b028cb2023-12-05 18:14:41 +01001506 return bdrv_backing_update_filename(c, base, filename,
1507 backing_mask_protocol,
1508 errp);
Max Reitz43483552020-05-13 13:05:24 +02001509 }
1510 return 0;
1511}
1512
Vladimir Sementsov-Ogievskiyfb62b582021-05-24 13:12:56 +03001513AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c)
Vladimir Sementsov-Ogievskiy3ca1f322021-04-28 18:17:33 +03001514{
1515 BlockDriverState *bs = c->opaque;
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05001516 IO_CODE();
Vladimir Sementsov-Ogievskiy3ca1f322021-04-28 18:17:33 +03001517
1518 return bdrv_get_aio_context(bs);
1519}
1520
Max Reitz43483552020-05-13 13:05:24 +02001521const BdrvChildClass child_of_bds = {
1522 .parent_is_bds = true,
1523 .get_parent_desc = bdrv_child_get_parent_desc,
1524 .inherit_options = bdrv_inherited_options,
1525 .drained_begin = bdrv_child_cb_drained_begin,
1526 .drained_poll = bdrv_child_cb_drained_poll,
1527 .drained_end = bdrv_child_cb_drained_end,
1528 .attach = bdrv_child_cb_attach,
1529 .detach = bdrv_child_cb_detach,
1530 .inactivate = bdrv_child_cb_inactivate,
Emanuele Giuseppe Esposito27633e72022-10-25 04:49:47 -04001531 .change_aio_ctx = bdrv_child_cb_change_aio_ctx,
Max Reitz43483552020-05-13 13:05:24 +02001532 .update_filename = bdrv_child_cb_update_filename,
Vladimir Sementsov-Ogievskiyfb62b582021-05-24 13:12:56 +03001533 .get_parent_aio_context = child_of_bds_get_parent_aio_context,
Max Reitz43483552020-05-13 13:05:24 +02001534};
1535
Vladimir Sementsov-Ogievskiy3ca1f322021-04-28 18:17:33 +03001536AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
1537{
Hanna Reitzd5f8d792022-11-07 16:13:19 +01001538 IO_CODE();
Vladimir Sementsov-Ogievskiy3ca1f322021-04-28 18:17:33 +03001539 return c->klass->get_parent_aio_context(c);
1540}
1541
Kevin Wolf7b272452012-11-12 17:05:39 +01001542static int bdrv_open_flags(BlockDriverState *bs, int flags)
1543{
Kevin Wolf61de4c62016-03-18 17:46:45 +01001544 int open_flags = flags;
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05001545 GLOBAL_STATE_CODE();
Kevin Wolf7b272452012-11-12 17:05:39 +01001546
1547 /*
1548 * Clear flags that are internal to the block layer before opening the
1549 * image.
1550 */
Kevin Wolf20cca272014-06-04 14:33:27 +02001551 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
Kevin Wolf7b272452012-11-12 17:05:39 +01001552
Kevin Wolf7b272452012-11-12 17:05:39 +01001553 return open_flags;
1554}
1555
Kevin Wolf91a097e2015-05-08 17:49:53 +02001556static void update_flags_from_options(int *flags, QemuOpts *opts)
1557{
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05001558 GLOBAL_STATE_CODE();
1559
Alberto Garcia2a3d4332018-11-12 16:00:48 +02001560 *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY);
Kevin Wolf91a097e2015-05-08 17:49:53 +02001561
Alberto Garcia57f9db92018-09-06 12:37:06 +03001562 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
Kevin Wolf91a097e2015-05-08 17:49:53 +02001563 *flags |= BDRV_O_NO_FLUSH;
1564 }
1565
Alberto Garcia57f9db92018-09-06 12:37:06 +03001566 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) {
Kevin Wolf91a097e2015-05-08 17:49:53 +02001567 *flags |= BDRV_O_NOCACHE;
1568 }
Alberto Garciaf87a0e22016-09-15 17:53:02 +03001569
Alberto Garcia57f9db92018-09-06 12:37:06 +03001570 if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) {
Alberto Garciaf87a0e22016-09-15 17:53:02 +03001571 *flags |= BDRV_O_RDWR;
1572 }
1573
Kevin Wolfe35bdc12018-10-05 18:57:40 +02001574 if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) {
1575 *flags |= BDRV_O_AUTO_RDONLY;
1576 }
Kevin Wolffaecd162025-02-04 22:13:58 +01001577
1578 if (!qemu_opt_get_bool_del(opts, BDRV_OPT_ACTIVE, true)) {
1579 *flags |= BDRV_O_INACTIVE;
1580 }
Kevin Wolf91a097e2015-05-08 17:49:53 +02001581}
1582
1583static void update_options_from_flags(QDict *options, int flags)
1584{
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05001585 GLOBAL_STATE_CODE();
Kevin Wolf91a097e2015-05-08 17:49:53 +02001586 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
Eric Blake46f5ac22017-04-27 16:58:17 -05001587 qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
Kevin Wolf91a097e2015-05-08 17:49:53 +02001588 }
1589 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
Eric Blake46f5ac22017-04-27 16:58:17 -05001590 qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH,
1591 flags & BDRV_O_NO_FLUSH);
Kevin Wolf91a097e2015-05-08 17:49:53 +02001592 }
Alberto Garciaf87a0e22016-09-15 17:53:02 +03001593 if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) {
Eric Blake46f5ac22017-04-27 16:58:17 -05001594 qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR));
Alberto Garciaf87a0e22016-09-15 17:53:02 +03001595 }
Kevin Wolfe35bdc12018-10-05 18:57:40 +02001596 if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) {
1597 qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY,
1598 flags & BDRV_O_AUTO_RDONLY);
1599 }
Kevin Wolf91a097e2015-05-08 17:49:53 +02001600}
1601
Kevin Wolf636ea372014-01-24 14:11:52 +01001602static void bdrv_assign_node_name(BlockDriverState *bs,
1603 const char *node_name,
1604 Error **errp)
Benoît Canet6913c0c2014-01-23 21:31:33 +01001605{
Jeff Cody15489c72015-10-12 19:36:50 -04001606 char *gen_node_name = NULL;
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05001607 GLOBAL_STATE_CODE();
Benoît Canet6913c0c2014-01-23 21:31:33 +01001608
Jeff Cody15489c72015-10-12 19:36:50 -04001609 if (!node_name) {
1610 node_name = gen_node_name = id_generate(ID_BLOCK);
1611 } else if (!id_wellformed(node_name)) {
1612 /*
1613 * Check for empty string or invalid characters, but not if it is
1614 * generated (generated names use characters not available to the user)
1615 */
Connor Kuehl785ec4b2021-03-05 09:19:28 -06001616 error_setg(errp, "Invalid node-name: '%s'", node_name);
Kevin Wolf636ea372014-01-24 14:11:52 +01001617 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +01001618 }
1619
Benoît Canet0c5e94e2014-02-12 17:15:07 +01001620 /* takes care of avoiding namespaces collisions */
Markus Armbruster7f06d472014-10-07 13:59:12 +02001621 if (blk_by_name(node_name)) {
Benoît Canet0c5e94e2014-02-12 17:15:07 +01001622 error_setg(errp, "node-name=%s is conflicting with a device id",
1623 node_name);
Jeff Cody15489c72015-10-12 19:36:50 -04001624 goto out;
Benoît Canet0c5e94e2014-02-12 17:15:07 +01001625 }
1626
Benoît Canet6913c0c2014-01-23 21:31:33 +01001627 /* takes care of avoiding duplicates node names */
1628 if (bdrv_find_node(node_name)) {
Connor Kuehl785ec4b2021-03-05 09:19:28 -06001629 error_setg(errp, "Duplicate nodes with node-name='%s'", node_name);
Jeff Cody15489c72015-10-12 19:36:50 -04001630 goto out;
Benoît Canet6913c0c2014-01-23 21:31:33 +01001631 }
1632
Kevin Wolf824808d2018-07-04 13:28:29 +02001633 /* Make sure that the node name isn't truncated */
1634 if (strlen(node_name) >= sizeof(bs->node_name)) {
1635 error_setg(errp, "Node name too long");
1636 goto out;
1637 }
1638
Benoît Canet6913c0c2014-01-23 21:31:33 +01001639 /* copy node name into the bs and insert it into the graph list */
1640 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
1641 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
Jeff Cody15489c72015-10-12 19:36:50 -04001642out:
1643 g_free(gen_node_name);
Benoît Canet6913c0c2014-01-23 21:31:33 +01001644}
1645
Kevin Wolf1a30b0f2023-05-04 13:57:38 +02001646static int no_coroutine_fn GRAPH_UNLOCKED
1647bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
1648 QDict *options, int open_flags, Error **errp)
Kevin Wolf01a56502017-01-18 15:51:56 +01001649{
1650 Error *local_err = NULL;
Kevin Wolf0f122642018-03-28 18:29:18 +02001651 int i, ret;
Emanuele Giuseppe Espositoda359902022-03-03 10:16:11 -05001652 GLOBAL_STATE_CODE();
Kevin Wolf01a56502017-01-18 15:51:56 +01001653
1654 bdrv_assign_node_name(bs, node_name, &local_err);
1655 if (local_err) {
1656 error_propagate(errp, local_err);
1657 return -EINVAL;
1658 }
1659
1660 bs->drv = drv;
1661 bs->opaque = g_malloc0(drv->instance_size);
1662
Paolo Bonziniae8b45d2022-11-24 16:29:06 +01001663 assert(!drv->bdrv_needs_filename || bs->filename[0]);
Paolo Bonzini44b424d2022-11-24 16:22:22 +01001664 if (drv->bdrv_open) {
Kevin Wolf01a56502017-01-18 15:51:56 +01001665 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf680c7f92017-01-18 17:16:41 +01001666 } else {
1667 ret = 0;
Kevin Wolf01a56502017-01-18 15:51:56 +01001668 }
1669
1670 if (ret < 0) {
1671 if (local_err) {
1672 error_propagate(errp, local_err);
1673 } else if (bs->filename[0]) {
1674 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1675 } else {
1676 error_setg_errno(errp, -ret, "Could not open image");
1677 }
Manos Pitsidianakis180ca192017-07-14 17:35:48 +03001678 goto open_failed;
Kevin Wolf01a56502017-01-18 15:51:56 +01001679 }
1680
Stefan Hajnoczie8b65352022-10-13 14:59:01 -04001681 assert(!(bs->supported_read_flags & ~BDRV_REQ_MASK));
1682 assert(!(bs->supported_write_flags & ~BDRV_REQ_MASK));
1683
1684 /*
1685 * Always allow the BDRV_REQ_REGISTERED_BUF optimization hint. This saves
1686 * drivers that pass read/write requests through to a child the trouble of
1687 * declaring support explicitly.
1688 *
1689 * Drivers must not propagate this flag accidentally when they initiate I/O
1690 * to a bounce buffer. That case should be rare though.
1691 */
1692 bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF;
1693 bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF;
1694
Emanuele Giuseppe Espositoc0579602023-01-13 21:42:03 +01001695 ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
Kevin Wolf01a56502017-01-18 15:51:56 +01001696 if (ret < 0) {
1697 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Manos Pitsidianakis180ca192017-07-14 17:35:48 +03001698 return ret;
Kevin Wolf01a56502017-01-18 15:51:56 +01001699 }
1700
Kevin Wolfe19b1572023-05-04 13:57:50 +02001701 bdrv_graph_rdlock_main_loop();
Vladimir Sementsov-Ogievskiy1e4c7972021-04-28 18:17:55 +03001702 bdrv_refresh_limits(bs, NULL, &local_err);
Kevin Wolfe19b1572023-05-04 13:57:50 +02001703 bdrv_graph_rdunlock_main_loop();
1704
Kevin Wolf01a56502017-01-18 15:51:56 +01001705 if (local_err) {
1706 error_propagate(errp, local_err);
Manos Pitsidianakis180ca192017-07-14 17:35:48 +03001707 return -EINVAL;
Kevin Wolf01a56502017-01-18 15:51:56 +01001708 }
1709
1710 assert(bdrv_opt_mem_align(bs) != 0);
1711 assert(bdrv_min_mem_align(bs) != 0);
1712 assert(is_power_of_2(bs->bl.request_alignment));
1713
Kevin Wolf0f122642018-03-28 18:29:18 +02001714 for (i = 0; i < bs->quiesce_counter; i++) {
Kevin Wolf5e8ac212022-11-18 18:40:58 +01001715 if (drv->bdrv_drain_begin) {
1716 drv->bdrv_drain_begin(bs);
Kevin Wolf0f122642018-03-28 18:29:18 +02001717 }
1718 }
1719
Kevin Wolf01a56502017-01-18 15:51:56 +01001720 return 0;
Manos Pitsidianakis180ca192017-07-14 17:35:48 +03001721open_failed:
1722 bs->drv = NULL;
Kevin Wolf1f051dc2023-10-27 17:53:33 +02001723
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05001724 bdrv_graph_wrlock();
Manos Pitsidianakis180ca192017-07-14 17:35:48 +03001725 if (bs->file != NULL) {
1726 bdrv_unref_child(bs, bs->file);
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03001727 assert(!bs->file);
Manos Pitsidianakis180ca192017-07-14 17:35:48 +03001728 }
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05001729 bdrv_graph_wrunlock();
Kevin Wolf1f051dc2023-10-27 17:53:33 +02001730
Kevin Wolf01a56502017-01-18 15:51:56 +01001731 g_free(bs->opaque);
1732 bs->opaque = NULL;
Kevin Wolf01a56502017-01-18 15:51:56 +01001733 return ret;
1734}
1735
Vladimir Sementsov-Ogievskiy621d1732021-09-20 14:55:34 +03001736/*
1737 * Create and open a block node.
1738 *
1739 * @options is a QDict of options to pass to the block drivers, or NULL for an
1740 * empty set of options. The reference to the QDict belongs to the block layer
1741 * after the call (even on failure), so if the caller intends to reuse the
1742 * dictionary, it needs to use qobject_ref() before calling bdrv_open.
1743 */
1744BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
1745 const char *node_name,
1746 QDict *options, int flags,
1747 Error **errp)
Kevin Wolf680c7f92017-01-18 17:16:41 +01001748{
1749 BlockDriverState *bs;
1750 int ret;
1751
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05001752 GLOBAL_STATE_CODE();
1753
Kevin Wolf680c7f92017-01-18 17:16:41 +01001754 bs = bdrv_new();
1755 bs->open_flags = flags;
Vladimir Sementsov-Ogievskiy621d1732021-09-20 14:55:34 +03001756 bs->options = options ?: qdict_new();
1757 bs->explicit_options = qdict_clone_shallow(bs->options);
Kevin Wolf680c7f92017-01-18 17:16:41 +01001758 bs->opaque = NULL;
1759
1760 update_options_from_flags(bs->options, flags);
1761
1762 ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp);
1763 if (ret < 0) {
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02001764 qobject_unref(bs->explicit_options);
Manos Pitsidianakis180ca192017-07-14 17:35:48 +03001765 bs->explicit_options = NULL;
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02001766 qobject_unref(bs->options);
Manos Pitsidianakis180ca192017-07-14 17:35:48 +03001767 bs->options = NULL;
Kevin Wolf680c7f92017-01-18 17:16:41 +01001768 bdrv_unref(bs);
1769 return NULL;
1770 }
1771
1772 return bs;
1773}
1774
Vladimir Sementsov-Ogievskiy621d1732021-09-20 14:55:34 +03001775/* Create and open a block node. */
1776BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
1777 int flags, Error **errp)
1778{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05001779 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiy621d1732021-09-20 14:55:34 +03001780 return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp);
1781}
1782
Kevin Wolfc5f30142016-10-06 11:33:17 +02001783QemuOptsList bdrv_runtime_opts = {
Kevin Wolf18edf282015-04-07 17:12:56 +02001784 .name = "bdrv_common",
1785 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
1786 .desc = {
1787 {
1788 .name = "node-name",
1789 .type = QEMU_OPT_STRING,
1790 .help = "Node name of the block device node",
1791 },
Kevin Wolf62392eb2015-04-24 16:38:02 +02001792 {
1793 .name = "driver",
1794 .type = QEMU_OPT_STRING,
1795 .help = "Block driver to use for the node",
1796 },
Kevin Wolf91a097e2015-05-08 17:49:53 +02001797 {
Kevin Wolf91a097e2015-05-08 17:49:53 +02001798 .name = BDRV_OPT_CACHE_DIRECT,
1799 .type = QEMU_OPT_BOOL,
1800 .help = "Bypass software writeback cache on the host",
1801 },
1802 {
1803 .name = BDRV_OPT_CACHE_NO_FLUSH,
1804 .type = QEMU_OPT_BOOL,
1805 .help = "Ignore flush requests",
1806 },
Alberto Garciaf87a0e22016-09-15 17:53:02 +03001807 {
Kevin Wolffaecd162025-02-04 22:13:58 +01001808 .name = BDRV_OPT_ACTIVE,
1809 .type = QEMU_OPT_BOOL,
1810 .help = "Node is activated",
1811 },
1812 {
Alberto Garciaf87a0e22016-09-15 17:53:02 +03001813 .name = BDRV_OPT_READ_ONLY,
1814 .type = QEMU_OPT_BOOL,
1815 .help = "Node is opened in read-only mode",
1816 },
Kevin Wolf692e01a2016-09-12 21:00:41 +02001817 {
Kevin Wolfe35bdc12018-10-05 18:57:40 +02001818 .name = BDRV_OPT_AUTO_READ_ONLY,
1819 .type = QEMU_OPT_BOOL,
1820 .help = "Node can become read-only if opening read-write fails",
1821 },
1822 {
Kevin Wolf692e01a2016-09-12 21:00:41 +02001823 .name = "detect-zeroes",
1824 .type = QEMU_OPT_STRING,
1825 .help = "try to optimize zero writes (off, on, unmap)",
1826 },
Kevin Wolf818584a2016-09-12 18:03:18 +02001827 {
Alberto Garcia415bbca2018-10-03 13:23:13 +03001828 .name = BDRV_OPT_DISCARD,
Kevin Wolf818584a2016-09-12 18:03:18 +02001829 .type = QEMU_OPT_STRING,
1830 .help = "discard operation (ignore/off, unmap/on)",
1831 },
Fam Zheng5a9347c2017-05-03 00:35:37 +08001832 {
1833 .name = BDRV_OPT_FORCE_SHARE,
1834 .type = QEMU_OPT_BOOL,
1835 .help = "always accept other writers (default: off)",
1836 },
Kevin Wolf18edf282015-04-07 17:12:56 +02001837 { /* end of list */ }
1838 },
1839};
1840
Maxim Levitsky5a5e7f82020-03-26 03:12:18 +02001841QemuOptsList bdrv_create_opts_simple = {
1842 .name = "simple-create-opts",
1843 .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head),
Max Reitzfd171462020-01-22 17:45:29 +01001844 .desc = {
1845 {
1846 .name = BLOCK_OPT_SIZE,
1847 .type = QEMU_OPT_SIZE,
1848 .help = "Virtual disk size"
1849 },
1850 {
1851 .name = BLOCK_OPT_PREALLOC,
1852 .type = QEMU_OPT_STRING,
1853 .help = "Preallocation mode (allowed values: off)"
1854 },
1855 { /* end of list */ }
1856 }
1857};
1858
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001859/*
Kevin Wolf57915332010-04-14 15:24:50 +02001860 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001861 *
1862 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +02001863 */
Kevin Wolf5696c6e2017-02-17 18:39:24 +01001864static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
Kevin Wolf82dc8b42016-01-11 19:07:50 +01001865 QDict *options, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +02001866{
1867 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +02001868 const char *filename;
Kevin Wolf62392eb2015-04-24 16:38:02 +02001869 const char *driver_name = NULL;
Benoît Canet6913c0c2014-01-23 21:31:33 +01001870 const char *node_name = NULL;
Kevin Wolf818584a2016-09-12 18:03:18 +02001871 const char *discard;
Kevin Wolf18edf282015-04-07 17:12:56 +02001872 QemuOpts *opts;
Kevin Wolf62392eb2015-04-24 16:38:02 +02001873 BlockDriver *drv;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001874 Error *local_err = NULL;
Vladimir Sementsov-Ogievskiy307261b2021-05-27 18:40:54 +03001875 bool ro;
Kevin Wolf57915332010-04-14 15:24:50 +02001876
Kevin Wolf1f051dc2023-10-27 17:53:33 +02001877 GLOBAL_STATE_CODE();
1878
1879 bdrv_graph_rdlock_main_loop();
Paolo Bonzini64058752012-05-08 16:51:49 +02001880 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +01001881 assert(options != NULL && bs->options != options);
Kevin Wolf1f051dc2023-10-27 17:53:33 +02001882 bdrv_graph_rdunlock_main_loop();
Kevin Wolf57915332010-04-14 15:24:50 +02001883
Kevin Wolf62392eb2015-04-24 16:38:02 +02001884 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
Markus Armbrusteraf175e82020-07-07 18:06:03 +02001885 if (!qemu_opts_absorb_qdict(opts, options, errp)) {
Kevin Wolf62392eb2015-04-24 16:38:02 +02001886 ret = -EINVAL;
1887 goto fail_opts;
1888 }
1889
Alberto Garcia9b7e8692016-09-15 17:53:01 +03001890 update_flags_from_options(&bs->open_flags, opts);
1891
Kevin Wolf62392eb2015-04-24 16:38:02 +02001892 driver_name = qemu_opt_get(opts, "driver");
1893 drv = bdrv_find_format(driver_name);
1894 assert(drv != NULL);
1895
Fam Zheng5a9347c2017-05-03 00:35:37 +08001896 bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false);
1897
1898 if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) {
1899 error_setg(errp,
1900 BDRV_OPT_FORCE_SHARE
1901 "=on can only be used with read-only images");
1902 ret = -EINVAL;
1903 goto fail_opts;
1904 }
1905
Kevin Wolf45673672013-04-22 17:48:40 +02001906 if (file != NULL) {
Kevin Wolfb7cfc7d2023-09-29 16:51:45 +02001907 bdrv_graph_rdlock_main_loop();
Max Reitzf30c66b2019-02-01 20:29:05 +01001908 bdrv_refresh_filename(blk_bs(file));
Kevin Wolfb7cfc7d2023-09-29 16:51:45 +02001909 bdrv_graph_rdunlock_main_loop();
1910
Kevin Wolf5696c6e2017-02-17 18:39:24 +01001911 filename = blk_bs(file)->filename;
Kevin Wolf45673672013-04-22 17:48:40 +02001912 } else {
Markus Armbruster129c7d12017-03-30 19:43:12 +02001913 /*
1914 * Caution: while qdict_get_try_str() is fine, getting
1915 * non-string types would require more care. When @options
1916 * come from -blockdev or blockdev_add, its members are typed
1917 * according to the QAPI schema, but when they come from
1918 * -drive, they're all QString.
1919 */
Kevin Wolf45673672013-04-22 17:48:40 +02001920 filename = qdict_get_try_str(options, "filename");
1921 }
1922
Max Reitz4a008242017-04-13 18:06:24 +02001923 if (drv->bdrv_needs_filename && (!filename || !filename[0])) {
Kevin Wolf765003d2014-02-03 14:49:42 +01001924 error_setg(errp, "The '%s' block driver requires a file name",
1925 drv->format_name);
Kevin Wolf18edf282015-04-07 17:12:56 +02001926 ret = -EINVAL;
1927 goto fail_opts;
1928 }
1929
Kevin Wolf82dc8b42016-01-11 19:07:50 +01001930 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
1931 drv->format_name);
Kevin Wolf62392eb2015-04-24 16:38:02 +02001932
Vladimir Sementsov-Ogievskiy307261b2021-05-27 18:40:54 +03001933 ro = bdrv_is_read_only(bs);
1934
1935 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) {
1936 if (!ro && bdrv_is_whitelisted(drv, true)) {
Kevin Wolf018f9de2023-09-29 16:51:53 +02001937 bdrv_graph_rdlock_main_loop();
Kevin Wolf8be25de2019-01-22 13:15:31 +01001938 ret = bdrv_apply_auto_read_only(bs, NULL, NULL);
Kevin Wolf018f9de2023-09-29 16:51:53 +02001939 bdrv_graph_rdunlock_main_loop();
Kevin Wolf8be25de2019-01-22 13:15:31 +01001940 } else {
1941 ret = -ENOTSUP;
1942 }
1943 if (ret < 0) {
1944 error_setg(errp,
Vladimir Sementsov-Ogievskiy307261b2021-05-27 18:40:54 +03001945 !ro && bdrv_is_whitelisted(drv, true)
Kevin Wolf8be25de2019-01-22 13:15:31 +01001946 ? "Driver '%s' can only be used for read-only devices"
1947 : "Driver '%s' is not whitelisted",
1948 drv->format_name);
1949 goto fail_opts;
1950 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +08001951 }
Kevin Wolf57915332010-04-14 15:24:50 +02001952
Paolo Bonzinid3faa132017-06-05 14:38:50 +02001953 /* bdrv_new() and bdrv_close() make it so */
Stefan Hajnoczid73415a2020-09-23 11:56:46 +01001954 assert(qatomic_read(&bs->copy_on_read) == 0);
Paolo Bonzinid3faa132017-06-05 14:38:50 +02001955
Kevin Wolf82dc8b42016-01-11 19:07:50 +01001956 if (bs->open_flags & BDRV_O_COPY_ON_READ) {
Vladimir Sementsov-Ogievskiy307261b2021-05-27 18:40:54 +03001957 if (!ro) {
Kevin Wolf0ebd24e2013-09-19 15:12:18 +02001958 bdrv_enable_copy_on_read(bs);
1959 } else {
1960 error_setg(errp, "Can't use copy-on-read on read-only device");
Kevin Wolf18edf282015-04-07 17:12:56 +02001961 ret = -EINVAL;
1962 goto fail_opts;
Kevin Wolf0ebd24e2013-09-19 15:12:18 +02001963 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001964 }
1965
Alberto Garcia415bbca2018-10-03 13:23:13 +03001966 discard = qemu_opt_get(opts, BDRV_OPT_DISCARD);
Kevin Wolf818584a2016-09-12 18:03:18 +02001967 if (discard != NULL) {
1968 if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) {
1969 error_setg(errp, "Invalid discard option");
1970 ret = -EINVAL;
1971 goto fail_opts;
1972 }
1973 }
1974
Alberto Garcia543770b2018-09-06 12:37:09 +03001975 bs->detect_zeroes =
1976 bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err);
1977 if (local_err) {
1978 error_propagate(errp, local_err);
1979 ret = -EINVAL;
1980 goto fail_opts;
Kevin Wolf692e01a2016-09-12 21:00:41 +02001981 }
1982
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001983 if (filename != NULL) {
1984 pstrcpy(bs->filename, sizeof(bs->filename), filename);
1985 } else {
1986 bs->filename[0] = '\0';
1987 }
Max Reitz91af7012014-07-18 20:24:56 +02001988 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +02001989
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001990 /* Open the image, either directly or using a protocol */
Kevin Wolf82dc8b42016-01-11 19:07:50 +01001991 open_flags = bdrv_open_flags(bs, bs->open_flags);
Kevin Wolf01a56502017-01-18 15:51:56 +01001992 node_name = qemu_opt_get(opts, "node-name");
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001993
Paolo Bonzini41770f62022-11-24 16:21:18 +01001994 assert(!drv->protocol_name || file == NULL);
Kevin Wolf01a56502017-01-18 15:51:56 +01001995 ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp);
Kevin Wolf57915332010-04-14 15:24:50 +02001996 if (ret < 0) {
Kevin Wolf01a56502017-01-18 15:51:56 +01001997 goto fail_opts;
Kevin Wolf57915332010-04-14 15:24:50 +02001998 }
1999
Kevin Wolf18edf282015-04-07 17:12:56 +02002000 qemu_opts_del(opts);
Kevin Wolf57915332010-04-14 15:24:50 +02002001 return 0;
2002
Kevin Wolf18edf282015-04-07 17:12:56 +02002003fail_opts:
2004 qemu_opts_del(opts);
Kevin Wolf57915332010-04-14 15:24:50 +02002005 return ret;
2006}
2007
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02002008static QDict *parse_json_filename(const char *filename, Error **errp)
2009{
Zhao Liu7b22e052024-03-11 11:37:56 +08002010 ERRP_GUARD();
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02002011 QObject *options_obj;
2012 QDict *options;
2013 int ret;
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05002014 GLOBAL_STATE_CODE();
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02002015
2016 ret = strstart(filename, "json:", &filename);
2017 assert(ret);
2018
Markus Armbruster5577fff2017-02-28 22:26:59 +01002019 options_obj = qobject_from_json(filename, errp);
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02002020 if (!options_obj) {
Markus Armbruster5577fff2017-02-28 22:26:59 +01002021 error_prepend(errp, "Could not parse the JSON options: ");
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02002022 return NULL;
2023 }
2024
Max Reitz7dc847e2018-02-24 16:40:29 +01002025 options = qobject_to(QDict, options_obj);
Markus Armbrusterca6b6e12017-02-17 21:38:18 +01002026 if (!options) {
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02002027 qobject_unref(options_obj);
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02002028 error_setg(errp, "Invalid JSON object given");
2029 return NULL;
2030 }
2031
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02002032 qdict_flatten(options);
2033
2034 return options;
2035}
2036
Kevin Wolfde3b53f2015-10-29 15:24:41 +01002037static void parse_json_protocol(QDict *options, const char **pfilename,
2038 Error **errp)
2039{
2040 QDict *json_options;
2041 Error *local_err = NULL;
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05002042 GLOBAL_STATE_CODE();
Kevin Wolfde3b53f2015-10-29 15:24:41 +01002043
2044 /* Parse json: pseudo-protocol */
2045 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
2046 return;
2047 }
2048
2049 json_options = parse_json_filename(*pfilename, &local_err);
2050 if (local_err) {
2051 error_propagate(errp, local_err);
2052 return;
2053 }
2054
2055 /* Options given in the filename have lower priority than options
2056 * specified directly */
2057 qdict_join(options, json_options, false);
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02002058 qobject_unref(json_options);
Kevin Wolfde3b53f2015-10-29 15:24:41 +01002059 *pfilename = NULL;
2060}
2061
Kevin Wolf57915332010-04-14 15:24:50 +02002062/*
Kevin Wolff54120f2014-05-26 11:09:59 +02002063 * Fills in default options for opening images and converts the legacy
2064 * filename/flags pair to option QDict entries.
Max Reitz53a29512015-03-19 14:53:16 -04002065 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
2066 * block driver has been specified explicitly.
Kevin Wolff54120f2014-05-26 11:09:59 +02002067 */
Kevin Wolfde3b53f2015-10-29 15:24:41 +01002068static int bdrv_fill_options(QDict **options, const char *filename,
Kevin Wolf7ead9462024-04-25 14:56:02 +02002069 int *flags, bool allow_parse_filename,
2070 Error **errp)
Kevin Wolff54120f2014-05-26 11:09:59 +02002071{
2072 const char *drvname;
Max Reitz53a29512015-03-19 14:53:16 -04002073 bool protocol = *flags & BDRV_O_PROTOCOL;
Kevin Wolff54120f2014-05-26 11:09:59 +02002074 bool parse_filename = false;
Max Reitz053e1572015-08-26 19:47:51 +02002075 BlockDriver *drv = NULL;
Kevin Wolff54120f2014-05-26 11:09:59 +02002076 Error *local_err = NULL;
Kevin Wolff54120f2014-05-26 11:09:59 +02002077
Emanuele Giuseppe Espositoda359902022-03-03 10:16:11 -05002078 GLOBAL_STATE_CODE();
2079
Markus Armbruster129c7d12017-03-30 19:43:12 +02002080 /*
2081 * Caution: while qdict_get_try_str() is fine, getting non-string
2082 * types would require more care. When @options come from
2083 * -blockdev or blockdev_add, its members are typed according to
2084 * the QAPI schema, but when they come from -drive, they're all
2085 * QString.
2086 */
Max Reitz53a29512015-03-19 14:53:16 -04002087 drvname = qdict_get_try_str(*options, "driver");
Max Reitz053e1572015-08-26 19:47:51 +02002088 if (drvname) {
2089 drv = bdrv_find_format(drvname);
2090 if (!drv) {
2091 error_setg(errp, "Unknown driver '%s'", drvname);
2092 return -ENOENT;
2093 }
2094 /* If the user has explicitly specified the driver, this choice should
2095 * override the BDRV_O_PROTOCOL flag */
Paolo Bonzini41770f62022-11-24 16:21:18 +01002096 protocol = drv->protocol_name;
Max Reitz53a29512015-03-19 14:53:16 -04002097 }
2098
2099 if (protocol) {
2100 *flags |= BDRV_O_PROTOCOL;
2101 } else {
2102 *flags &= ~BDRV_O_PROTOCOL;
2103 }
2104
Kevin Wolf91a097e2015-05-08 17:49:53 +02002105 /* Translate cache options from flags into options */
2106 update_options_from_flags(*options, *flags);
2107
Kevin Wolff54120f2014-05-26 11:09:59 +02002108 /* Fetch the file name from the options QDict if necessary */
Kevin Wolf17b005f2014-05-27 10:50:29 +02002109 if (protocol && filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02002110 if (!qdict_haskey(*options, "filename")) {
Eric Blake46f5ac22017-04-27 16:58:17 -05002111 qdict_put_str(*options, "filename", filename);
Kevin Wolf7ead9462024-04-25 14:56:02 +02002112 parse_filename = allow_parse_filename;
Kevin Wolff54120f2014-05-26 11:09:59 +02002113 } else {
2114 error_setg(errp, "Can't specify 'file' and 'filename' options at "
2115 "the same time");
2116 return -EINVAL;
2117 }
2118 }
2119
2120 /* Find the right block driver */
Markus Armbruster129c7d12017-03-30 19:43:12 +02002121 /* See cautionary note on accessing @options above */
Kevin Wolff54120f2014-05-26 11:09:59 +02002122 filename = qdict_get_try_str(*options, "filename");
Kevin Wolff54120f2014-05-26 11:09:59 +02002123
Max Reitz053e1572015-08-26 19:47:51 +02002124 if (!drvname && protocol) {
2125 if (filename) {
2126 drv = bdrv_find_protocol(filename, parse_filename, errp);
2127 if (!drv) {
Kevin Wolff54120f2014-05-26 11:09:59 +02002128 return -EINVAL;
2129 }
Max Reitz053e1572015-08-26 19:47:51 +02002130
2131 drvname = drv->format_name;
Eric Blake46f5ac22017-04-27 16:58:17 -05002132 qdict_put_str(*options, "driver", drvname);
Max Reitz053e1572015-08-26 19:47:51 +02002133 } else {
2134 error_setg(errp, "Must specify either driver or file");
2135 return -EINVAL;
Kevin Wolff54120f2014-05-26 11:09:59 +02002136 }
2137 }
2138
Kevin Wolf17b005f2014-05-27 10:50:29 +02002139 assert(drv || !protocol);
Kevin Wolff54120f2014-05-26 11:09:59 +02002140
2141 /* Driver-specific filename parsing */
Kevin Wolf17b005f2014-05-27 10:50:29 +02002142 if (drv && drv->bdrv_parse_filename && parse_filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02002143 drv->bdrv_parse_filename(filename, *options, &local_err);
2144 if (local_err) {
2145 error_propagate(errp, local_err);
2146 return -EINVAL;
2147 }
2148
2149 if (!drv->bdrv_needs_filename) {
2150 qdict_del(*options, "filename");
2151 }
2152 }
2153
2154 return 0;
2155}
2156
Kevin Wolf148eb132017-09-14 14:32:04 +02002157typedef struct BlockReopenQueueEntry {
2158 bool prepared;
2159 BDRVReopenState state;
Vladimir Sementsov-Ogievskiy859443b2019-09-27 15:23:47 +03002160 QTAILQ_ENTRY(BlockReopenQueueEntry) entry;
Kevin Wolf148eb132017-09-14 14:32:04 +02002161} BlockReopenQueueEntry;
2162
2163/*
2164 * Return the flags that @bs will have after the reopens in @q have
2165 * successfully completed. If @q is NULL (or @bs is not contained in @q),
2166 * return the current flags.
2167 */
2168static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs)
2169{
2170 BlockReopenQueueEntry *entry;
2171
2172 if (q != NULL) {
Vladimir Sementsov-Ogievskiy859443b2019-09-27 15:23:47 +03002173 QTAILQ_FOREACH(entry, q, entry) {
Kevin Wolf148eb132017-09-14 14:32:04 +02002174 if (entry->state.bs == bs) {
2175 return entry->state.flags;
2176 }
2177 }
2178 }
2179
2180 return bs->open_flags;
2181}
2182
2183/* Returns whether the image file can be written to after the reopen queue @q
2184 * has been successfully applied, or right now if @q is NULL. */
Max Reitzcc022142018-06-06 21:37:00 +02002185static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
2186 BlockReopenQueue *q)
Kevin Wolf148eb132017-09-14 14:32:04 +02002187{
2188 int flags = bdrv_reopen_get_flags(q, bs);
2189
2190 return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR;
2191}
2192
Max Reitzcc022142018-06-06 21:37:00 +02002193/*
2194 * Return whether the BDS can be written to. This is not necessarily
2195 * the same as !bdrv_is_read_only(bs), as inactivated images may not
2196 * be written to but do not count as read-only images.
2197 */
2198bool bdrv_is_writable(BlockDriverState *bs)
2199{
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05002200 IO_CODE();
Max Reitzcc022142018-06-06 21:37:00 +02002201 return bdrv_is_writable_after_reopen(bs, NULL);
2202}
2203
Vladimir Sementsov-Ogievskiy3bf416b2021-04-28 18:17:37 +03002204static char *bdrv_child_user_desc(BdrvChild *c)
2205{
Emanuele Giuseppe Espositof0c28322022-03-03 10:16:13 -05002206 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiyda261b62021-06-01 10:52:17 +03002207 return c->klass->get_parent_desc(c);
Vladimir Sementsov-Ogievskiy3bf416b2021-04-28 18:17:37 +03002208}
2209
Vladimir Sementsov-Ogievskiy30ebb9a2021-06-01 10:52:18 +03002210/*
2211 * Check that @a allows everything that @b needs. @a and @b must reference same
2212 * child node.
2213 */
Vladimir Sementsov-Ogievskiy3bf416b2021-04-28 18:17:37 +03002214static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
2215{
Vladimir Sementsov-Ogievskiy30ebb9a2021-06-01 10:52:18 +03002216 const char *child_bs_name;
2217 g_autofree char *a_user = NULL;
2218 g_autofree char *b_user = NULL;
2219 g_autofree char *perms = NULL;
2220
2221 assert(a->bs);
2222 assert(a->bs == b->bs);
Emanuele Giuseppe Esposito862fded2022-03-03 10:15:55 -05002223 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiy3bf416b2021-04-28 18:17:37 +03002224
2225 if ((b->perm & a->shared_perm) == b->perm) {
2226 return true;
2227 }
2228
Vladimir Sementsov-Ogievskiy30ebb9a2021-06-01 10:52:18 +03002229 child_bs_name = bdrv_get_node_name(b->bs);
2230 a_user = bdrv_child_user_desc(a);
2231 b_user = bdrv_child_user_desc(b);
2232 perms = bdrv_perm_names(b->perm & ~a->shared_perm);
2233
2234 error_setg(errp, "Permission conflict on node '%s': permissions '%s' are "
2235 "both required by %s (uses node '%s' as '%s' child) and "
2236 "unshared by %s (uses node '%s' as '%s' child).",
2237 child_bs_name, perms,
2238 b_user, child_bs_name, b->name,
2239 a_user, child_bs_name, a->name);
Vladimir Sementsov-Ogievskiy3bf416b2021-04-28 18:17:37 +03002240
2241 return false;
2242}
2243
Kevin Wolf3804e3c2023-09-11 11:46:12 +02002244static bool GRAPH_RDLOCK
2245bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
Vladimir Sementsov-Ogievskiy3bf416b2021-04-28 18:17:37 +03002246{
2247 BdrvChild *a, *b;
Emanuele Giuseppe Esposito862fded2022-03-03 10:15:55 -05002248 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiy3bf416b2021-04-28 18:17:37 +03002249
2250 /*
2251 * During the loop we'll look at each pair twice. That's correct because
2252 * bdrv_a_allow_b() is asymmetric and we should check each pair in both
2253 * directions.
2254 */
2255 QLIST_FOREACH(a, &bs->parents, next_parent) {
2256 QLIST_FOREACH(b, &bs->parents, next_parent) {
Vladimir Sementsov-Ogievskiy9397c142021-04-28 18:17:53 +03002257 if (a == b) {
Vladimir Sementsov-Ogievskiy3bf416b2021-04-28 18:17:37 +03002258 continue;
2259 }
2260
2261 if (!bdrv_a_allow_b(a, b, errp)) {
2262 return true;
2263 }
2264 }
2265 }
2266
2267 return false;
2268}
2269
Kevin Wolfc629b6d2023-09-11 11:46:14 +02002270static void GRAPH_RDLOCK
2271bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
2272 BdrvChild *c, BdrvChildRole role,
2273 BlockReopenQueue *reopen_queue,
2274 uint64_t parent_perm, uint64_t parent_shared,
2275 uint64_t *nperm, uint64_t *nshared)
Fam Zhengffd1a5a2017-05-03 00:35:38 +08002276{
Alberto Garcia0b3ca762019-04-04 14:29:53 +03002277 assert(bs->drv && bs->drv->bdrv_child_perm);
Emanuele Giuseppe Espositoda359902022-03-03 10:16:11 -05002278 GLOBAL_STATE_CODE();
Max Reitze5d8a402020-05-13 13:05:44 +02002279 bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
Alberto Garcia0b3ca762019-04-04 14:29:53 +03002280 parent_perm, parent_shared,
2281 nperm, nshared);
Kevin Wolfe0995dc2017-09-14 12:47:11 +02002282 /* TODO Take force_share from reopen_queue */
Fam Zhengffd1a5a2017-05-03 00:35:38 +08002283 if (child_bs && child_bs->force_share) {
2284 *nshared = BLK_PERM_ALL;
2285 }
2286}
2287
Vladimir Sementsov-Ogievskiybd57f8f2021-04-28 18:17:41 +03002288/*
2289 * Adds the whole subtree of @bs (including @bs itself) to the @list (except for
2290 * nodes that are already in the @list, of course) so that final list is
2291 * topologically sorted. Return the result (GSList @list object is updated, so
2292 * don't use old reference after function call).
2293 *
2294 * On function start @list must be already topologically sorted and for any node
2295 * in the @list the whole subtree of the node must be in the @list as well. The
2296 * simplest way to satisfy this criteria: use only result of
2297 * bdrv_topological_dfs() or NULL as @list parameter.
2298 */
Kevin Wolf3804e3c2023-09-11 11:46:12 +02002299static GSList * GRAPH_RDLOCK
2300bdrv_topological_dfs(GSList *list, GHashTable *found, BlockDriverState *bs)
Vladimir Sementsov-Ogievskiybd57f8f2021-04-28 18:17:41 +03002301{
2302 BdrvChild *child;
2303 g_autoptr(GHashTable) local_found = NULL;
2304
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05002305 GLOBAL_STATE_CODE();
2306
Vladimir Sementsov-Ogievskiybd57f8f2021-04-28 18:17:41 +03002307 if (!found) {
2308 assert(!list);
2309 found = local_found = g_hash_table_new(NULL, NULL);
2310 }
2311
2312 if (g_hash_table_contains(found, bs)) {
2313 return list;
2314 }
2315 g_hash_table_add(found, bs);
2316
2317 QLIST_FOREACH(child, &bs->children, next) {
2318 list = bdrv_topological_dfs(list, found, child->bs);
2319 }
2320
2321 return g_slist_prepend(list, bs);
2322}
2323
Vladimir Sementsov-Ogievskiyecb776b2021-04-28 18:18:02 +03002324typedef struct BdrvChildSetPermState {
2325 BdrvChild *child;
2326 uint64_t old_perm;
2327 uint64_t old_shared_perm;
2328} BdrvChildSetPermState;
Vladimir Sementsov-Ogievskiyb0defa82021-04-28 18:17:38 +03002329
2330static void bdrv_child_set_perm_abort(void *opaque)
2331{
Vladimir Sementsov-Ogievskiyecb776b2021-04-28 18:18:02 +03002332 BdrvChildSetPermState *s = opaque;
2333
Emanuele Giuseppe Esposito862fded2022-03-03 10:15:55 -05002334 GLOBAL_STATE_CODE();
2335
Vladimir Sementsov-Ogievskiyecb776b2021-04-28 18:18:02 +03002336 s->child->perm = s->old_perm;
2337 s->child->shared_perm = s->old_shared_perm;
Vladimir Sementsov-Ogievskiyb0defa82021-04-28 18:17:38 +03002338}
2339
2340static TransactionActionDrv bdrv_child_set_pem_drv = {
2341 .abort = bdrv_child_set_perm_abort,
Vladimir Sementsov-Ogievskiyecb776b2021-04-28 18:18:02 +03002342 .clean = g_free,
Vladimir Sementsov-Ogievskiyb0defa82021-04-28 18:17:38 +03002343};
2344
Vladimir Sementsov-Ogievskiyecb776b2021-04-28 18:18:02 +03002345static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm,
2346 uint64_t shared, Transaction *tran)
Vladimir Sementsov-Ogievskiyb0defa82021-04-28 18:17:38 +03002347{
Vladimir Sementsov-Ogievskiyecb776b2021-04-28 18:18:02 +03002348 BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1);
Emanuele Giuseppe Esposito862fded2022-03-03 10:15:55 -05002349 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiyecb776b2021-04-28 18:18:02 +03002350
2351 *s = (BdrvChildSetPermState) {
2352 .child = c,
2353 .old_perm = c->perm,
2354 .old_shared_perm = c->shared_perm,
2355 };
Vladimir Sementsov-Ogievskiyb0defa82021-04-28 18:17:38 +03002356
2357 c->perm = perm;
2358 c->shared_perm = shared;
2359
Vladimir Sementsov-Ogievskiyecb776b2021-04-28 18:18:02 +03002360 tran_add(tran, &bdrv_child_set_pem_drv, s);
Vladimir Sementsov-Ogievskiyb0defa82021-04-28 18:17:38 +03002361}
2362
Kevin Wolfbce73bc2023-09-11 11:46:13 +02002363static void GRAPH_RDLOCK bdrv_drv_set_perm_commit(void *opaque)
Vladimir Sementsov-Ogievskiy2513ef52021-04-28 18:17:42 +03002364{
2365 BlockDriverState *bs = opaque;
2366 uint64_t cumulative_perms, cumulative_shared_perms;
Emanuele Giuseppe Espositoda359902022-03-03 10:16:11 -05002367 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiy2513ef52021-04-28 18:17:42 +03002368
2369 if (bs->drv->bdrv_set_perm) {
2370 bdrv_get_cumulative_perm(bs, &cumulative_perms,
2371 &cumulative_shared_perms);
2372 bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
2373 }
2374}
2375
Kevin Wolfbce73bc2023-09-11 11:46:13 +02002376static void GRAPH_RDLOCK bdrv_drv_set_perm_abort(void *opaque)
Vladimir Sementsov-Ogievskiy2513ef52021-04-28 18:17:42 +03002377{
2378 BlockDriverState *bs = opaque;
Emanuele Giuseppe Espositoda359902022-03-03 10:16:11 -05002379 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiy2513ef52021-04-28 18:17:42 +03002380
2381 if (bs->drv->bdrv_abort_perm_update) {
2382 bs->drv->bdrv_abort_perm_update(bs);
2383 }
2384}
2385
2386TransactionActionDrv bdrv_drv_set_perm_drv = {
2387 .abort = bdrv_drv_set_perm_abort,
2388 .commit = bdrv_drv_set_perm_commit,
2389};
2390
Kevin Wolfbce73bc2023-09-11 11:46:13 +02002391/*
2392 * After calling this function, the transaction @tran may only be completed
2393 * while holding a reader lock for the graph.
2394 */
2395static int GRAPH_RDLOCK
2396bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm,
2397 Transaction *tran, Error **errp)
Vladimir Sementsov-Ogievskiy2513ef52021-04-28 18:17:42 +03002398{
Emanuele Giuseppe Espositoda359902022-03-03 10:16:11 -05002399 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiy2513ef52021-04-28 18:17:42 +03002400 if (!bs->drv) {
2401 return 0;
2402 }
2403
2404 if (bs->drv->bdrv_check_perm) {
2405 int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp);
2406 if (ret < 0) {
2407 return ret;
2408 }
2409 }
2410
2411 if (tran) {
2412 tran_add(tran, &bdrv_drv_set_perm_drv, bs);
2413 }
2414
2415 return 0;
2416}
2417
Vladimir Sementsov-Ogievskiy09786232021-04-28 18:17:44 +03002418typedef struct BdrvReplaceChildState {
2419 BdrvChild *child;
2420 BlockDriverState *old_bs;
2421} BdrvReplaceChildState;
2422
Kevin Wolf5661a002023-09-11 11:46:10 +02002423static void GRAPH_WRLOCK bdrv_replace_child_commit(void *opaque)
Vladimir Sementsov-Ogievskiy09786232021-04-28 18:17:44 +03002424{
2425 BdrvReplaceChildState *s = opaque;
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05002426 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiy09786232021-04-28 18:17:44 +03002427
Kevin Wolf5661a002023-09-11 11:46:10 +02002428 bdrv_schedule_unref(s->old_bs);
Vladimir Sementsov-Ogievskiy09786232021-04-28 18:17:44 +03002429}
2430
Kevin Wolf5661a002023-09-11 11:46:10 +02002431static void GRAPH_WRLOCK bdrv_replace_child_abort(void *opaque)
Vladimir Sementsov-Ogievskiy09786232021-04-28 18:17:44 +03002432{
2433 BdrvReplaceChildState *s = opaque;
2434 BlockDriverState *new_bs = s->child->bs;
2435
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05002436 GLOBAL_STATE_CODE();
Kevin Wolf5661a002023-09-11 11:46:10 +02002437 assert_bdrv_graph_writable();
Kevin Wolfad29eb32023-09-11 11:46:07 +02002438
Vladimir Sementsov-Ogievskiy0f0b1e22022-07-26 23:11:29 +03002439 /* old_bs reference is transparently moved from @s to @s->child */
Kevin Wolf23987472022-11-18 18:41:09 +01002440 if (!s->child->bs) {
2441 /*
2442 * The parents were undrained when removing old_bs from the child. New
2443 * requests can't have been made, though, because the child was empty.
2444 *
2445 * TODO Make bdrv_replace_child_noperm() transactionable to avoid
2446 * undraining the parent in the first place. Once this is done, having
2447 * new_bs drained when calling bdrv_replace_child_tran() is not a
2448 * requirement any more.
2449 */
Kevin Wolf606ed752022-11-18 18:41:10 +01002450 bdrv_parent_drained_begin_single(s->child);
Kevin Wolf23987472022-11-18 18:41:09 +01002451 assert(!bdrv_parent_drained_poll_single(s->child));
2452 }
2453 assert(s->child->quiesced_parent);
Vladimir Sementsov-Ogievskiy544acc72022-07-26 23:11:31 +03002454 bdrv_replace_child_noperm(s->child, s->old_bs);
Kevin Wolfad29eb32023-09-11 11:46:07 +02002455
Vladimir Sementsov-Ogievskiy09786232021-04-28 18:17:44 +03002456 bdrv_unref(new_bs);
2457}
2458
2459static TransactionActionDrv bdrv_replace_child_drv = {
2460 .commit = bdrv_replace_child_commit,
2461 .abort = bdrv_replace_child_abort,
2462 .clean = g_free,
2463};
2464
2465/*
Vladimir Sementsov-Ogievskiy4bf021d2021-06-10 14:25:44 +03002466 * bdrv_replace_child_tran
Vladimir Sementsov-Ogievskiy09786232021-04-28 18:17:44 +03002467 *
2468 * Note: real unref of old_bs is done only on commit.
Vladimir Sementsov-Ogievskiy4bf021d2021-06-10 14:25:44 +03002469 *
Kevin Wolf23987472022-11-18 18:41:09 +01002470 * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be
2471 * kept drained until the transaction is completed.
2472 *
Kevin Wolf5661a002023-09-11 11:46:10 +02002473 * After calling this function, the transaction @tran may only be completed
2474 * while holding a writer lock for the graph.
2475 *
Vladimir Sementsov-Ogievskiy4bf021d2021-06-10 14:25:44 +03002476 * The function doesn't update permissions, caller is responsible for this.
Vladimir Sementsov-Ogievskiy09786232021-04-28 18:17:44 +03002477 */
Kevin Wolf2f64e1f2023-09-11 11:46:08 +02002478static void GRAPH_WRLOCK
2479bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
2480 Transaction *tran)
Vladimir Sementsov-Ogievskiy09786232021-04-28 18:17:44 +03002481{
2482 BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
Kevin Wolf23987472022-11-18 18:41:09 +01002483
2484 assert(child->quiesced_parent);
2485 assert(!new_bs || new_bs->quiesce_counter);
2486
Vladimir Sementsov-Ogievskiy09786232021-04-28 18:17:44 +03002487 *s = (BdrvReplaceChildState) {
Vladimir Sementsov-Ogievskiy0f0b1e22022-07-26 23:11:29 +03002488 .child = child,
2489 .old_bs = child->bs,
Vladimir Sementsov-Ogievskiy09786232021-04-28 18:17:44 +03002490 };
2491 tran_add(tran, &bdrv_replace_child_drv, s);
2492
2493 if (new_bs) {
2494 bdrv_ref(new_bs);
2495 }
Kevin Wolfad29eb32023-09-11 11:46:07 +02002496
Vladimir Sementsov-Ogievskiy544acc72022-07-26 23:11:31 +03002497 bdrv_replace_child_noperm(child, new_bs);
Vladimir Sementsov-Ogievskiy0f0b1e22022-07-26 23:11:29 +03002498 /* old_bs reference is transparently moved from @child to @s */
Vladimir Sementsov-Ogievskiy09786232021-04-28 18:17:44 +03002499}
2500
Kevin Wolf33a610c2016-12-15 13:04:20 +01002501/*
Vladimir Sementsov-Ogievskiyc20555e2021-04-28 18:18:04 +03002502 * Refresh permissions in @bs subtree. The function is intended to be called
2503 * after some graph modification that was done without permission update.
Kevin Wolfbce73bc2023-09-11 11:46:13 +02002504 *
2505 * After calling this function, the transaction @tran may only be completed
2506 * while holding a reader lock for the graph.
Kevin Wolf33a610c2016-12-15 13:04:20 +01002507 */
Kevin Wolfbce73bc2023-09-11 11:46:13 +02002508static int GRAPH_RDLOCK
2509bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
2510 Transaction *tran, Error **errp)
Kevin Wolf33a610c2016-12-15 13:04:20 +01002511{
2512 BlockDriver *drv = bs->drv;
2513 BdrvChild *c;
2514 int ret;
Vladimir Sementsov-Ogievskiyc20555e2021-04-28 18:18:04 +03002515 uint64_t cumulative_perms, cumulative_shared_perms;
Emanuele Giuseppe Esposito862fded2022-03-03 10:15:55 -05002516 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiyc20555e2021-04-28 18:18:04 +03002517
2518 bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
Kevin Wolf33a610c2016-12-15 13:04:20 +01002519
2520 /* Write permissions never work with read-only images */
2521 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
Max Reitzcc022142018-06-06 21:37:00 +02002522 !bdrv_is_writable_after_reopen(bs, q))
Kevin Wolf33a610c2016-12-15 13:04:20 +01002523 {
Max Reitz481e0ee2019-05-15 22:15:00 +02002524 if (!bdrv_is_writable_after_reopen(bs, NULL)) {
2525 error_setg(errp, "Block node is read-only");
2526 } else {
Vladimir Sementsov-Ogievskiyc20555e2021-04-28 18:18:04 +03002527 error_setg(errp, "Read-only block node '%s' cannot support "
2528 "read-write users", bdrv_get_node_name(bs));
Max Reitz481e0ee2019-05-15 22:15:00 +02002529 }
2530
Kevin Wolf33a610c2016-12-15 13:04:20 +01002531 return -EPERM;
2532 }
2533
Kevin Wolf9c60a5d2020-07-16 16:26:00 +02002534 /*
2535 * Unaligned requests will automatically be aligned to bl.request_alignment
2536 * and without RESIZE we can't extend requests to write to space beyond the
2537 * end of the image, so it's required that the image size is aligned.
2538 */
2539 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
2540 !(cumulative_perms & BLK_PERM_RESIZE))
2541 {
2542 if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) {
2543 error_setg(errp, "Cannot get 'write' permission without 'resize': "
2544 "Image size is not a multiple of request "
2545 "alignment");
2546 return -EPERM;
2547 }
2548 }
2549
Kevin Wolf33a610c2016-12-15 13:04:20 +01002550 /* Check this node */
2551 if (!drv) {
2552 return 0;
2553 }
2554
Vladimir Sementsov-Ogievskiyb1d2bbe2021-04-28 18:17:43 +03002555 ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran,
Vladimir Sementsov-Ogievskiy2513ef52021-04-28 18:17:42 +03002556 errp);
2557 if (ret < 0) {
2558 return ret;
Kevin Wolf33a610c2016-12-15 13:04:20 +01002559 }
2560
Kevin Wolf78e421c2016-12-20 23:25:12 +01002561 /* Drivers that never have children can omit .bdrv_child_perm() */
Kevin Wolf33a610c2016-12-15 13:04:20 +01002562 if (!drv->bdrv_child_perm) {
Kevin Wolf78e421c2016-12-20 23:25:12 +01002563 assert(QLIST_EMPTY(&bs->children));
Kevin Wolf33a610c2016-12-15 13:04:20 +01002564 return 0;
2565 }
2566
2567 /* Check all children */
2568 QLIST_FOREACH(c, &bs->children, next) {
2569 uint64_t cur_perm, cur_shared;
Max Reitz9eab1542019-05-22 19:03:50 +02002570
Max Reitze5d8a402020-05-13 13:05:44 +02002571 bdrv_child_perm(bs, c->bs, c, c->role, q,
Fam Zhengffd1a5a2017-05-03 00:35:38 +08002572 cumulative_perms, cumulative_shared_perms,
2573 &cur_perm, &cur_shared);
Vladimir Sementsov-Ogievskiyecb776b2021-04-28 18:18:02 +03002574 bdrv_child_set_perm(c, cur_perm, cur_shared, tran);
Vladimir Sementsov-Ogievskiyb1d2bbe2021-04-28 18:17:43 +03002575 }
2576
2577 return 0;
2578}
2579
Vladimir Sementsov-Ogievskiyfb0ff4d2022-11-07 19:35:58 +03002580/*
2581 * @list is a product of bdrv_topological_dfs() (may be called several times) -
2582 * a topologically sorted subgraph.
Kevin Wolfbce73bc2023-09-11 11:46:13 +02002583 *
2584 * After calling this function, the transaction @tran may only be completed
2585 * while holding a reader lock for the graph.
Vladimir Sementsov-Ogievskiyfb0ff4d2022-11-07 19:35:58 +03002586 */
Kevin Wolf3804e3c2023-09-11 11:46:12 +02002587static int GRAPH_RDLOCK
2588bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q, Transaction *tran,
2589 Error **errp)
Vladimir Sementsov-Ogievskiyb1d2bbe2021-04-28 18:17:43 +03002590{
2591 int ret;
2592 BlockDriverState *bs;
Emanuele Giuseppe Esposito862fded2022-03-03 10:15:55 -05002593 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiyb1d2bbe2021-04-28 18:17:43 +03002594
Vladimir Sementsov-Ogievskiyb1d2bbe2021-04-28 18:17:43 +03002595 for ( ; list; list = list->next) {
2596 bs = list->data;
2597
Vladimir Sementsov-Ogievskiy9397c142021-04-28 18:17:53 +03002598 if (bdrv_parent_perms_conflict(bs, errp)) {
Vladimir Sementsov-Ogievskiyb1d2bbe2021-04-28 18:17:43 +03002599 return -EINVAL;
2600 }
2601
Vladimir Sementsov-Ogievskiyc20555e2021-04-28 18:18:04 +03002602 ret = bdrv_node_refresh_perm(bs, q, tran, errp);
Vladimir Sementsov-Ogievskiyb1d2bbe2021-04-28 18:17:43 +03002603 if (ret < 0) {
2604 return ret;
2605 }
Vladimir Sementsov-Ogievskiybd57f8f2021-04-28 18:17:41 +03002606 }
Vladimir Sementsov-Ogievskiy3ef45e02021-04-28 18:17:40 +03002607
Vladimir Sementsov-Ogievskiybd57f8f2021-04-28 18:17:41 +03002608 return 0;
2609}
2610
Vladimir Sementsov-Ogievskiyfb0ff4d2022-11-07 19:35:58 +03002611/*
2612 * @list is any list of nodes. List is completed by all subtrees and
2613 * topologically sorted. It's not a problem if some node occurs in the @list
2614 * several times.
Kevin Wolfbce73bc2023-09-11 11:46:13 +02002615 *
2616 * After calling this function, the transaction @tran may only be completed
2617 * while holding a reader lock for the graph.
Vladimir Sementsov-Ogievskiyfb0ff4d2022-11-07 19:35:58 +03002618 */
Kevin Wolf3804e3c2023-09-11 11:46:12 +02002619static int GRAPH_RDLOCK
2620bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, Transaction *tran,
2621 Error **errp)
Vladimir Sementsov-Ogievskiyfb0ff4d2022-11-07 19:35:58 +03002622{
2623 g_autoptr(GHashTable) found = g_hash_table_new(NULL, NULL);
2624 g_autoptr(GSList) refresh_list = NULL;
2625
2626 for ( ; list; list = list->next) {
2627 refresh_list = bdrv_topological_dfs(refresh_list, found, list->data);
2628 }
2629
2630 return bdrv_do_refresh_perms(refresh_list, q, tran, errp);
2631}
2632
Kevin Wolfc7a0f2b2020-03-10 12:38:25 +01002633void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
2634 uint64_t *shared_perm)
Kevin Wolf33a610c2016-12-15 13:04:20 +01002635{
2636 BdrvChild *c;
2637 uint64_t cumulative_perms = 0;
2638 uint64_t cumulative_shared_perms = BLK_PERM_ALL;
2639
Emanuele Giuseppe Espositob4ad82a2022-03-03 10:15:57 -05002640 GLOBAL_STATE_CODE();
2641
Kevin Wolf33a610c2016-12-15 13:04:20 +01002642 QLIST_FOREACH(c, &bs->parents, next_parent) {
2643 cumulative_perms |= c->perm;
2644 cumulative_shared_perms &= c->shared_perm;
2645 }
2646
2647 *perm = cumulative_perms;
2648 *shared_perm = cumulative_shared_perms;
2649}
2650
Fam Zheng51761962017-05-03 00:35:36 +08002651char *bdrv_perm_names(uint64_t perm)
Kevin Wolfd0833192017-01-16 18:26:20 +01002652{
2653 struct perm_name {
2654 uint64_t perm;
2655 const char *name;
2656 } permissions[] = {
2657 { BLK_PERM_CONSISTENT_READ, "consistent read" },
2658 { BLK_PERM_WRITE, "write" },
2659 { BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
2660 { BLK_PERM_RESIZE, "resize" },
Kevin Wolfd0833192017-01-16 18:26:20 +01002661 { 0, NULL }
2662 };
2663
Alberto Garciae2a74232020-01-10 18:15:18 +01002664 GString *result = g_string_sized_new(30);
Kevin Wolfd0833192017-01-16 18:26:20 +01002665 struct perm_name *p;
2666
2667 for (p = permissions; p->name; p++) {
2668 if (perm & p->perm) {
Alberto Garciae2a74232020-01-10 18:15:18 +01002669 if (result->len > 0) {
2670 g_string_append(result, ", ");
2671 }
2672 g_string_append(result, p->name);
Kevin Wolfd0833192017-01-16 18:26:20 +01002673 }
2674 }
2675
Alberto Garciae2a74232020-01-10 18:15:18 +01002676 return g_string_free(result, FALSE);
Kevin Wolfd0833192017-01-16 18:26:20 +01002677}
2678
Kevin Wolf33a610c2016-12-15 13:04:20 +01002679
Kevin Wolfbce73bc2023-09-11 11:46:13 +02002680/*
2681 * @tran is allowed to be NULL. In this case no rollback is possible.
2682 *
2683 * After calling this function, the transaction @tran may only be completed
2684 * while holding a reader lock for the graph.
2685 */
Kevin Wolf3804e3c2023-09-11 11:46:12 +02002686static int GRAPH_RDLOCK
2687bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran, Error **errp)
Vladimir Sementsov-Ogievskiybb87e4d2020-11-06 15:42:38 +03002688{
2689 int ret;
Vladimir Sementsov-Ogievskiyf1316ed2022-11-07 19:35:57 +03002690 Transaction *local_tran = NULL;
Vladimir Sementsov-Ogievskiyb1d2bbe2021-04-28 18:17:43 +03002691 g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
Emanuele Giuseppe Esposito862fded2022-03-03 10:15:55 -05002692 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiybb87e4d2020-11-06 15:42:38 +03002693
Vladimir Sementsov-Ogievskiyf1316ed2022-11-07 19:35:57 +03002694 if (!tran) {
2695 tran = local_tran = tran_new();
2696 }
2697
Vladimir Sementsov-Ogievskiyfb0ff4d2022-11-07 19:35:58 +03002698 ret = bdrv_do_refresh_perms(list, NULL, tran, errp);
Vladimir Sementsov-Ogievskiyf1316ed2022-11-07 19:35:57 +03002699
2700 if (local_tran) {
2701 tran_finalize(local_tran, ret);
2702 }
Vladimir Sementsov-Ogievskiybb87e4d2020-11-06 15:42:38 +03002703
Vladimir Sementsov-Ogievskiyb1d2bbe2021-04-28 18:17:43 +03002704 return ret;
Vladimir Sementsov-Ogievskiybb87e4d2020-11-06 15:42:38 +03002705}
2706
Kevin Wolf33a610c2016-12-15 13:04:20 +01002707int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
2708 Error **errp)
2709{
Max Reitz10467792019-05-22 19:03:51 +02002710 Error *local_err = NULL;
Vladimir Sementsov-Ogievskiy83928dc2021-04-28 18:17:39 +03002711 Transaction *tran = tran_new();
Kevin Wolf33a610c2016-12-15 13:04:20 +01002712 int ret;
2713
Emanuele Giuseppe Espositob4ad82a2022-03-03 10:15:57 -05002714 GLOBAL_STATE_CODE();
2715
Vladimir Sementsov-Ogievskiyecb776b2021-04-28 18:18:02 +03002716 bdrv_child_set_perm(c, perm, shared, tran);
Vladimir Sementsov-Ogievskiy83928dc2021-04-28 18:17:39 +03002717
Vladimir Sementsov-Ogievskiyf1316ed2022-11-07 19:35:57 +03002718 ret = bdrv_refresh_perms(c->bs, tran, &local_err);
Vladimir Sementsov-Ogievskiy83928dc2021-04-28 18:17:39 +03002719
2720 tran_finalize(tran, ret);
2721
Kevin Wolf33a610c2016-12-15 13:04:20 +01002722 if (ret < 0) {
Vladimir Sementsov-Ogievskiy071b4742020-11-06 15:42:41 +03002723 if ((perm & ~c->perm) || (c->shared_perm & ~shared)) {
2724 /* tighten permissions */
Max Reitz10467792019-05-22 19:03:51 +02002725 error_propagate(errp, local_err);
2726 } else {
2727 /*
2728 * Our caller may intend to only loosen restrictions and
2729 * does not expect this function to fail. Errors are not
2730 * fatal in such a case, so we can just hide them from our
2731 * caller.
2732 */
2733 error_free(local_err);
2734 ret = 0;
2735 }
Kevin Wolf33a610c2016-12-15 13:04:20 +01002736 }
2737
Vladimir Sementsov-Ogievskiy83928dc2021-04-28 18:17:39 +03002738 return ret;
Kevin Wolfd5e6f432016-12-14 17:24:36 +01002739}
2740
Max Reitzc1087f12019-05-22 19:03:46 +02002741int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
2742{
2743 uint64_t parent_perms, parent_shared;
2744 uint64_t perms, shared;
2745
Emanuele Giuseppe Espositob4ad82a2022-03-03 10:15:57 -05002746 GLOBAL_STATE_CODE();
2747
Max Reitzc1087f12019-05-22 19:03:46 +02002748 bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared);
Max Reitze5d8a402020-05-13 13:05:44 +02002749 bdrv_child_perm(bs, c->bs, c, c->role, NULL,
Max Reitzbf8e9252020-05-13 13:05:16 +02002750 parent_perms, parent_shared, &perms, &shared);
Max Reitzc1087f12019-05-22 19:03:46 +02002751
2752 return bdrv_child_try_set_perm(c, perms, shared, errp);
2753}
2754
Max Reitz87278af2020-05-13 13:05:40 +02002755/*
2756 * Default implementation for .bdrv_child_perm() for block filters:
2757 * Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the
2758 * filtered child.
2759 */
2760static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
Max Reitz87278af2020-05-13 13:05:40 +02002761 BdrvChildRole role,
2762 BlockReopenQueue *reopen_queue,
2763 uint64_t perm, uint64_t shared,
2764 uint64_t *nperm, uint64_t *nshared)
Kevin Wolf6a1b9ee2016-12-15 11:27:32 +01002765{
Emanuele Giuseppe Esposito862fded2022-03-03 10:15:55 -05002766 GLOBAL_STATE_CODE();
Kevin Wolfe444fa82019-08-02 15:59:41 +02002767 *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
2768 *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
Kevin Wolf6a1b9ee2016-12-15 11:27:32 +01002769}
2770
Max Reitz70082db2020-05-13 13:05:26 +02002771static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c,
Max Reitz70082db2020-05-13 13:05:26 +02002772 BdrvChildRole role,
2773 BlockReopenQueue *reopen_queue,
2774 uint64_t perm, uint64_t shared,
2775 uint64_t *nperm, uint64_t *nshared)
2776{
Max Reitze5d8a402020-05-13 13:05:44 +02002777 assert(role & BDRV_CHILD_COW);
Emanuele Giuseppe Esposito862fded2022-03-03 10:15:55 -05002778 GLOBAL_STATE_CODE();
Max Reitz70082db2020-05-13 13:05:26 +02002779
2780 /*
2781 * We want consistent read from backing files if the parent needs it.
2782 * No other operations are performed on backing files.
2783 */
2784 perm &= BLK_PERM_CONSISTENT_READ;
2785
2786 /*
2787 * If the parent can deal with changing data, we're okay with a
2788 * writable and resizable backing file.
2789 * TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too?
2790 */
2791 if (shared & BLK_PERM_WRITE) {
2792 shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
2793 } else {
2794 shared = 0;
2795 }
2796
Vladimir Sementsov-Ogievskiy64631f32021-09-02 12:37:54 +03002797 shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
Max Reitz70082db2020-05-13 13:05:26 +02002798
2799 if (bs->open_flags & BDRV_O_INACTIVE) {
2800 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2801 }
2802
2803 *nperm = perm;
2804 *nshared = shared;
2805}
2806
Max Reitz6f838a42020-05-13 13:05:27 +02002807static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c,
Max Reitz6f838a42020-05-13 13:05:27 +02002808 BdrvChildRole role,
2809 BlockReopenQueue *reopen_queue,
2810 uint64_t perm, uint64_t shared,
2811 uint64_t *nperm, uint64_t *nshared)
2812{
2813 int flags;
2814
Emanuele Giuseppe Esposito862fded2022-03-03 10:15:55 -05002815 GLOBAL_STATE_CODE();
Max Reitze5d8a402020-05-13 13:05:44 +02002816 assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA));
Max Reitz6f838a42020-05-13 13:05:27 +02002817
2818 flags = bdrv_reopen_get_flags(reopen_queue, bs);
2819
2820 /*
2821 * Apart from the modifications below, the same permissions are
2822 * forwarded and left alone as for filters
2823 */
Max Reitze5d8a402020-05-13 13:05:44 +02002824 bdrv_filter_default_perms(bs, c, role, reopen_queue,
Max Reitz6f838a42020-05-13 13:05:27 +02002825 perm, shared, &perm, &shared);
2826
Max Reitzf8890542020-05-13 13:05:28 +02002827 if (role & BDRV_CHILD_METADATA) {
2828 /* Format drivers may touch metadata even if the guest doesn't write */
2829 if (bdrv_is_writable_after_reopen(bs, reopen_queue)) {
2830 perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2831 }
2832
2833 /*
2834 * bs->file always needs to be consistent because of the
2835 * metadata. We can never allow other users to resize or write
2836 * to it.
2837 */
2838 if (!(flags & BDRV_O_NO_IO)) {
2839 perm |= BLK_PERM_CONSISTENT_READ;
2840 }
2841 shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
Max Reitz6f838a42020-05-13 13:05:27 +02002842 }
2843
Max Reitzf8890542020-05-13 13:05:28 +02002844 if (role & BDRV_CHILD_DATA) {
2845 /*
2846 * Technically, everything in this block is a subset of the
2847 * BDRV_CHILD_METADATA path taken above, and so this could
2848 * be an "else if" branch. However, that is not obvious, and
2849 * this function is not performance critical, therefore we let
2850 * this be an independent "if".
2851 */
2852
2853 /*
2854 * We cannot allow other users to resize the file because the
2855 * format driver might have some assumptions about the size
2856 * (e.g. because it is stored in metadata, or because the file
2857 * is split into fixed-size data files).
2858 */
2859 shared &= ~BLK_PERM_RESIZE;
2860
2861 /*
2862 * WRITE_UNCHANGED often cannot be performed as such on the
2863 * data file. For example, the qcow2 driver may still need to
2864 * write copied clusters on copy-on-read.
2865 */
2866 if (perm & BLK_PERM_WRITE_UNCHANGED) {
2867 perm |= BLK_PERM_WRITE;
2868 }
2869
2870 /*
2871 * If the data file is written to, the format driver may
2872 * expect to be able to resize it by writing beyond the EOF.
2873 */
2874 if (perm & BLK_PERM_WRITE) {
2875 perm |= BLK_PERM_RESIZE;
2876 }
Max Reitz6f838a42020-05-13 13:05:27 +02002877 }
Max Reitz6f838a42020-05-13 13:05:27 +02002878
2879 if (bs->open_flags & BDRV_O_INACTIVE) {
2880 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2881 }
2882
2883 *nperm = perm;
2884 *nshared = shared;
2885}
2886
Max Reitz2519f542020-05-13 13:05:29 +02002887void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
Max Reitze5d8a402020-05-13 13:05:44 +02002888 BdrvChildRole role, BlockReopenQueue *reopen_queue,
Max Reitz2519f542020-05-13 13:05:29 +02002889 uint64_t perm, uint64_t shared,
2890 uint64_t *nperm, uint64_t *nshared)
2891{
Emanuele Giuseppe Espositob4ad82a2022-03-03 10:15:57 -05002892 GLOBAL_STATE_CODE();
Max Reitz2519f542020-05-13 13:05:29 +02002893 if (role & BDRV_CHILD_FILTERED) {
2894 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
2895 BDRV_CHILD_COW)));
Max Reitze5d8a402020-05-13 13:05:44 +02002896 bdrv_filter_default_perms(bs, c, role, reopen_queue,
Max Reitz2519f542020-05-13 13:05:29 +02002897 perm, shared, nperm, nshared);
2898 } else if (role & BDRV_CHILD_COW) {
2899 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA)));
Max Reitze5d8a402020-05-13 13:05:44 +02002900 bdrv_default_perms_for_cow(bs, c, role, reopen_queue,
Max Reitz2519f542020-05-13 13:05:29 +02002901 perm, shared, nperm, nshared);
2902 } else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) {
Max Reitze5d8a402020-05-13 13:05:44 +02002903 bdrv_default_perms_for_storage(bs, c, role, reopen_queue,
Max Reitz2519f542020-05-13 13:05:29 +02002904 perm, shared, nperm, nshared);
2905 } else {
2906 g_assert_not_reached();
2907 }
2908}
2909
Max Reitz7b1d9c42019-11-08 13:34:51 +01002910uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
2911{
2912 static const uint64_t permissions[] = {
2913 [BLOCK_PERMISSION_CONSISTENT_READ] = BLK_PERM_CONSISTENT_READ,
2914 [BLOCK_PERMISSION_WRITE] = BLK_PERM_WRITE,
2915 [BLOCK_PERMISSION_WRITE_UNCHANGED] = BLK_PERM_WRITE_UNCHANGED,
2916 [BLOCK_PERMISSION_RESIZE] = BLK_PERM_RESIZE,
Max Reitz7b1d9c42019-11-08 13:34:51 +01002917 };
2918
2919 QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX);
2920 QEMU_BUILD_BUG_ON(1UL << ARRAY_SIZE(permissions) != BLK_PERM_ALL + 1);
2921
2922 assert(qapi_perm < BLOCK_PERMISSION__MAX);
2923
2924 return permissions[qapi_perm];
2925}
2926
Kevin Wolf23987472022-11-18 18:41:09 +01002927/*
2928 * Replaces the node that a BdrvChild points to without updating permissions.
2929 *
2930 * If @new_bs is non-NULL, the parent of @child must already be drained through
Stefan Hajnoczi23c983c2023-12-05 13:20:11 -05002931 * @child.
Kevin Wolf23987472022-11-18 18:41:09 +01002932 */
Kevin Wolfad29eb32023-09-11 11:46:07 +02002933static void GRAPH_WRLOCK
2934bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs)
Kevin Wolfe9740bc2016-05-23 15:52:26 +02002935{
2936 BlockDriverState *old_bs = child->bs;
Max Reitzdebc2922019-07-22 15:33:44 +02002937 int new_bs_quiesce_counter;
Kevin Wolfe9740bc2016-05-23 15:52:26 +02002938
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02002939 assert(!child->frozen);
Kevin Wolf23987472022-11-18 18:41:09 +01002940
2941 /*
2942 * If we want to change the BdrvChild to point to a drained node as its new
2943 * child->bs, we need to make sure that its new parent is drained, too. In
2944 * other words, either child->quiesce_parent must already be true or we must
2945 * be able to set it and keep the parent's quiesce_counter consistent with
2946 * that, but without polling or starting new requests (this function
2947 * guarantees that it doesn't poll, and starting new requests would be
2948 * against the invariants of drain sections).
2949 *
2950 * To keep things simple, we pick the first option (child->quiesce_parent
2951 * must already be true). We also generalise the rule a bit to make it
2952 * easier to verify in callers and more likely to be covered in test cases:
2953 * The parent must be quiesced through this child even if new_bs isn't
2954 * currently drained.
2955 *
2956 * The only exception is for callers that always pass new_bs == NULL. In
2957 * this case, we obviously never need to consider the case of a drained
2958 * new_bs, so we can keep the callers simpler by allowing them not to drain
2959 * the parent.
2960 */
2961 assert(!new_bs || child->quiesced_parent);
Kevin Wolfbfb8aa62021-10-18 15:47:14 +02002962 assert(old_bs != new_bs);
Emanuele Giuseppe Espositof0c28322022-03-03 10:16:13 -05002963 GLOBAL_STATE_CODE();
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02002964
Fam Zhengbb2614e2017-04-07 14:54:10 +08002965 if (old_bs && new_bs) {
2966 assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
2967 }
Max Reitzdebc2922019-07-22 15:33:44 +02002968
Kevin Wolfe9740bc2016-05-23 15:52:26 +02002969 if (old_bs) {
Max Reitzbd86fb92020-05-13 13:05:13 +02002970 if (child->klass->detach) {
2971 child->klass->detach(child);
Kevin Wolfd736f112017-12-18 16:05:48 +01002972 }
Kevin Wolfe9740bc2016-05-23 15:52:26 +02002973 QLIST_REMOVE(child, next_parent);
2974 }
Kevin Wolfe9740bc2016-05-23 15:52:26 +02002975
2976 child->bs = new_bs;
Kevin Wolf36fe1332016-05-17 14:51:55 +02002977
2978 if (new_bs) {
2979 QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
Max Reitzbd86fb92020-05-13 13:05:13 +02002980 if (child->klass->attach) {
2981 child->klass->attach(child);
Kevin Wolfdb95dbb2017-02-08 11:28:52 +01002982 }
Kevin Wolf36fe1332016-05-17 14:51:55 +02002983 }
Max Reitzdebc2922019-07-22 15:33:44 +02002984
2985 /*
Kevin Wolf23987472022-11-18 18:41:09 +01002986 * If the parent was drained through this BdrvChild previously, but new_bs
2987 * is not drained, allow requests to come in only after the new node has
2988 * been attached.
Max Reitzdebc2922019-07-22 15:33:44 +02002989 */
Kevin Wolf57e05be2022-11-18 18:41:06 +01002990 new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
2991 if (!new_bs_quiesce_counter && child->quiesced_parent) {
Max Reitzdebc2922019-07-22 15:33:44 +02002992 bdrv_parent_drained_end_single(child);
Max Reitzdebc2922019-07-22 15:33:44 +02002993 }
Kevin Wolfe9740bc2016-05-23 15:52:26 +02002994}
2995
Hanna Reitz04c9c3a2021-11-15 15:53:59 +01002996/**
2997 * Free the given @child.
2998 *
2999 * The child must be empty (i.e. `child->bs == NULL`) and it must be
3000 * unused (i.e. not in a children list).
3001 */
3002static void bdrv_child_free(BdrvChild *child)
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003003{
3004 assert(!child->bs);
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05003005 GLOBAL_STATE_CODE();
Kevin Wolf680e0cc2023-09-29 16:51:56 +02003006 GRAPH_RDLOCK_GUARD_MAINLOOP();
3007
Hanna Reitza2253692021-11-15 15:53:58 +01003008 assert(!child->next.le_prev); /* not in children list */
Hanna Reitz04c9c3a2021-11-15 15:53:59 +01003009
3010 g_free(child->name);
3011 g_free(child);
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003012}
3013
3014typedef struct BdrvAttachChildCommonState {
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003015 BdrvChild *child;
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003016 AioContext *old_parent_ctx;
3017 AioContext *old_child_ctx;
3018} BdrvAttachChildCommonState;
3019
Kevin Wolf5661a002023-09-11 11:46:10 +02003020static void GRAPH_WRLOCK bdrv_attach_child_common_abort(void *opaque)
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003021{
3022 BdrvAttachChildCommonState *s = opaque;
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003023 BlockDriverState *bs = s->child->bs;
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003024
Emanuele Giuseppe Espositof0c28322022-03-03 10:16:13 -05003025 GLOBAL_STATE_CODE();
Kevin Wolf5661a002023-09-11 11:46:10 +02003026 assert_bdrv_graph_writable();
Kevin Wolfad29eb32023-09-11 11:46:07 +02003027
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003028 bdrv_replace_child_noperm(s->child, NULL);
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003029
3030 if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
Fiona Ebnera1ea8eb2025-05-30 17:10:46 +02003031 bdrv_try_change_aio_context_locked(bs, s->old_child_ctx, NULL,
3032 &error_abort);
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003033 }
3034
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003035 if (bdrv_child_get_parent_aio_context(s->child) != s->old_parent_ctx) {
Emanuele Giuseppe Espositof8be48a2022-10-25 04:49:49 -04003036 Transaction *tran;
3037 GHashTable *visited;
3038 bool ret;
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003039
Emanuele Giuseppe Espositof8be48a2022-10-25 04:49:49 -04003040 tran = tran_new();
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003041
Emanuele Giuseppe Espositof8be48a2022-10-25 04:49:49 -04003042 /* No need to visit `child`, because it has been detached already */
3043 visited = g_hash_table_new(NULL, NULL);
3044 ret = s->child->klass->change_aio_ctx(s->child, s->old_parent_ctx,
3045 visited, tran, &error_abort);
3046 g_hash_table_destroy(visited);
3047
3048 /* transaction is supposed to always succeed */
3049 assert(ret == true);
3050 tran_commit(tran);
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003051 }
3052
Kevin Wolf5661a002023-09-11 11:46:10 +02003053 bdrv_schedule_unref(bs);
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003054 bdrv_child_free(s->child);
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003055}
3056
3057static TransactionActionDrv bdrv_attach_child_common_drv = {
3058 .abort = bdrv_attach_child_common_abort,
3059 .clean = g_free,
3060};
3061
3062/*
3063 * Common part of attaching bdrv child to bs or to blk or to job
Vladimir Sementsov-Ogievskiyf8d2ad72021-06-01 10:52:13 +03003064 *
Vladimir Sementsov-Ogievskiy7ec390d2021-06-10 14:25:45 +03003065 * Function doesn't update permissions, caller is responsible for this.
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003066 *
Kevin Wolf5661a002023-09-11 11:46:10 +02003067 * After calling this function, the transaction @tran may only be completed
3068 * while holding a writer lock for the graph.
3069 *
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003070 * Returns new created child.
Kevin Wolfc066e802023-06-05 10:57:05 +02003071 *
Stefan Hajnoczi23c983c2023-12-05 13:20:11 -05003072 * Both @parent_bs and @child_bs can move to a different AioContext in this
3073 * function.
Fiona Ebner2b833592025-05-30 17:10:47 +02003074 *
3075 * All block nodes must be drained before this function is called until after
3076 * the transaction is finalized.
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003077 */
Kevin Wolf7d4ca9d2023-09-11 11:46:09 +02003078static BdrvChild * GRAPH_WRLOCK
3079bdrv_attach_child_common(BlockDriverState *child_bs,
3080 const char *child_name,
3081 const BdrvChildClass *child_class,
3082 BdrvChildRole child_role,
3083 uint64_t perm, uint64_t shared_perm,
3084 void *opaque,
3085 Transaction *tran, Error **errp)
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003086{
3087 BdrvChild *new_child;
Stefan Hajnoczib49f4752023-12-05 13:20:03 -05003088 AioContext *parent_ctx;
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003089 AioContext *child_ctx = bdrv_get_aio_context(child_bs);
3090
Vladimir Sementsov-Ogievskiyda261b62021-06-01 10:52:17 +03003091 assert(child_class->get_parent_desc);
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05003092 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003093
Kevin Wolf8c2c72a2025-02-04 22:13:57 +01003094 if (bdrv_is_inactive(child_bs) && (perm & ~BLK_PERM_CONSISTENT_READ)) {
3095 g_autofree char *perm_names = bdrv_perm_names(perm);
3096 error_setg(errp, "Permission '%s' unavailable on inactive node",
3097 perm_names);
3098 return NULL;
3099 }
3100
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003101 new_child = g_new(BdrvChild, 1);
3102 *new_child = (BdrvChild) {
3103 .bs = NULL,
3104 .name = g_strdup(child_name),
3105 .klass = child_class,
3106 .role = child_role,
3107 .perm = perm,
3108 .shared_perm = shared_perm,
3109 .opaque = opaque,
3110 };
3111
3112 /*
3113 * If the AioContexts don't match, first try to move the subtree of
3114 * child_bs into the AioContext of the new parent. If this doesn't work,
3115 * try moving the parent into the AioContext of child_bs instead.
3116 */
3117 parent_ctx = bdrv_child_get_parent_aio_context(new_child);
3118 if (child_ctx != parent_ctx) {
3119 Error *local_err = NULL;
Fiona Ebnera1ea8eb2025-05-30 17:10:46 +02003120 int ret = bdrv_try_change_aio_context_locked(child_bs, parent_ctx, NULL,
3121 &local_err);
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003122
Emanuele Giuseppe Espositof8be48a2022-10-25 04:49:49 -04003123 if (ret < 0 && child_class->change_aio_ctx) {
Markus Armbrusterfb2575f2023-09-21 14:13:11 +02003124 Transaction *aio_ctx_tran = tran_new();
Emanuele Giuseppe Espositof8be48a2022-10-25 04:49:49 -04003125 GHashTable *visited = g_hash_table_new(NULL, NULL);
3126 bool ret_child;
3127
3128 g_hash_table_add(visited, new_child);
3129 ret_child = child_class->change_aio_ctx(new_child, child_ctx,
Markus Armbrusterfb2575f2023-09-21 14:13:11 +02003130 visited, aio_ctx_tran,
3131 NULL);
Emanuele Giuseppe Espositof8be48a2022-10-25 04:49:49 -04003132 if (ret_child == true) {
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003133 error_free(local_err);
3134 ret = 0;
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003135 }
Markus Armbrusterfb2575f2023-09-21 14:13:11 +02003136 tran_finalize(aio_ctx_tran, ret_child == true ? 0 : -1);
Emanuele Giuseppe Espositof8be48a2022-10-25 04:49:49 -04003137 g_hash_table_destroy(visited);
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003138 }
3139
3140 if (ret < 0) {
3141 error_propagate(errp, local_err);
Hanna Reitz04c9c3a2021-11-15 15:53:59 +01003142 bdrv_child_free(new_child);
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003143 return NULL;
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003144 }
3145 }
3146
3147 bdrv_ref(child_bs);
Kevin Wolf23987472022-11-18 18:41:09 +01003148 /*
3149 * Let every new BdrvChild start with a drained parent. Inserting the child
3150 * in the graph with bdrv_replace_child_noperm() will undrain it if
3151 * @child_bs is not drained.
3152 *
3153 * The child was only just created and is not yet visible in global state
3154 * until bdrv_replace_child_noperm() inserts it into the graph, so nobody
3155 * could have sent requests and polling is not necessary.
3156 *
3157 * Note that this means that the parent isn't fully drained yet, we only
3158 * stop new requests from coming in. This is fine, we don't care about the
3159 * old requests here, they are not for this child. If another place enters a
3160 * drain section for the same parent, but wants it to be fully quiesced, it
Michael Tokarev8c3edfa2025-05-07 20:03:13 +03003161 * will not run most of the code in .drained_begin() again (which is not
Kevin Wolf23987472022-11-18 18:41:09 +01003162 * a problem, we already did this), but it will still poll until the parent
3163 * is fully quiesced, so it will not be negatively affected either.
3164 */
Kevin Wolf606ed752022-11-18 18:41:10 +01003165 bdrv_parent_drained_begin_single(new_child);
Vladimir Sementsov-Ogievskiy544acc72022-07-26 23:11:31 +03003166 bdrv_replace_child_noperm(new_child, child_bs);
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003167
3168 BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
3169 *s = (BdrvAttachChildCommonState) {
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003170 .child = new_child,
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003171 .old_parent_ctx = parent_ctx,
3172 .old_child_ctx = child_ctx,
3173 };
3174 tran_add(tran, &bdrv_attach_child_common_drv, s);
3175
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003176 return new_child;
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003177}
3178
Vladimir Sementsov-Ogievskiyf8d2ad72021-06-01 10:52:13 +03003179/*
Vladimir Sementsov-Ogievskiy7ec390d2021-06-10 14:25:45 +03003180 * Function doesn't update permissions, caller is responsible for this.
Kevin Wolfc066e802023-06-05 10:57:05 +02003181 *
Stefan Hajnoczi23c983c2023-12-05 13:20:11 -05003182 * Both @parent_bs and @child_bs can move to a different AioContext in this
3183 * function.
Kevin Wolf5661a002023-09-11 11:46:10 +02003184 *
3185 * After calling this function, the transaction @tran may only be completed
3186 * while holding a writer lock for the graph.
Fiona Ebner2b833592025-05-30 17:10:47 +02003187 *
3188 * All block nodes must be drained before this function is called until after
3189 * the transaction is finalized.
Vladimir Sementsov-Ogievskiyf8d2ad72021-06-01 10:52:13 +03003190 */
Kevin Wolf7d4ca9d2023-09-11 11:46:09 +02003191static BdrvChild * GRAPH_WRLOCK
3192bdrv_attach_child_noperm(BlockDriverState *parent_bs,
3193 BlockDriverState *child_bs,
3194 const char *child_name,
3195 const BdrvChildClass *child_class,
3196 BdrvChildRole child_role,
3197 Transaction *tran,
3198 Error **errp)
Vladimir Sementsov-Ogievskiyaa5a04c2021-04-28 18:17:47 +03003199{
Vladimir Sementsov-Ogievskiyaa5a04c2021-04-28 18:17:47 +03003200 uint64_t perm, shared_perm;
3201
3202 assert(parent_bs->drv);
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05003203 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiyaa5a04c2021-04-28 18:17:47 +03003204
Kevin Wolfbfb8aa62021-10-18 15:47:14 +02003205 if (bdrv_recurse_has_child(child_bs, parent_bs)) {
3206 error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle",
3207 child_bs->node_name, child_name, parent_bs->node_name);
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003208 return NULL;
Kevin Wolfbfb8aa62021-10-18 15:47:14 +02003209 }
Kevin Wolf9b813612025-02-04 22:13:56 +01003210 if (bdrv_is_inactive(child_bs) && !bdrv_is_inactive(parent_bs)) {
3211 error_setg(errp, "Inactive '%s' can't be a %s child of active '%s'",
3212 child_bs->node_name, child_name, parent_bs->node_name);
3213 return NULL;
3214 }
Kevin Wolfbfb8aa62021-10-18 15:47:14 +02003215
Vladimir Sementsov-Ogievskiyaa5a04c2021-04-28 18:17:47 +03003216 bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
3217 bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
3218 perm, shared_perm, &perm, &shared_perm);
3219
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003220 return bdrv_attach_child_common(child_bs, child_name, child_class,
3221 child_role, perm, shared_perm, parent_bs,
3222 tran, errp);
Vladimir Sementsov-Ogievskiyaa5a04c2021-04-28 18:17:47 +03003223}
3224
Alberto Garciab441dc72019-05-13 16:46:18 +03003225/*
3226 * This function steals the reference to child_bs from the caller.
3227 * That reference is later dropped by bdrv_root_unref_child().
3228 *
3229 * On failure NULL is returned, errp is set and the reference to
3230 * child_bs is also dropped.
Fiona Ebnerffdcd082025-05-30 17:10:49 +02003231 *
3232 * All block nodes must be drained.
Alberto Garciab441dc72019-05-13 16:46:18 +03003233 */
Kevin Wolff21d96d2016-03-08 13:47:46 +01003234BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
3235 const char *child_name,
Max Reitzbd86fb92020-05-13 13:05:13 +02003236 const BdrvChildClass *child_class,
Max Reitz258b7762020-05-13 13:05:15 +02003237 BdrvChildRole child_role,
Kevin Wolfd5e6f432016-12-14 17:24:36 +01003238 uint64_t perm, uint64_t shared_perm,
3239 void *opaque, Error **errp)
Kevin Wolfdf581792015-06-15 11:53:47 +02003240{
Kevin Wolfd5e6f432016-12-14 17:24:36 +01003241 int ret;
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003242 BdrvChild *child;
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003243 Transaction *tran = tran_new();
Kevin Wolfd5e6f432016-12-14 17:24:36 +01003244
Emanuele Giuseppe Espositob4ad82a2022-03-03 10:15:57 -05003245 GLOBAL_STATE_CODE();
3246
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003247 child = bdrv_attach_child_common(child_bs, child_name, child_class,
Vladimir Sementsov-Ogievskiy548a74c2021-04-28 18:17:46 +03003248 child_role, perm, shared_perm, opaque,
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003249 tran, errp);
3250 if (!child) {
3251 ret = -EINVAL;
Kevin Wolfe878bb12021-05-03 13:05:54 +02003252 goto out;
Kevin Wolfd5e6f432016-12-14 17:24:36 +01003253 }
3254
Vladimir Sementsov-Ogievskiyf1316ed2022-11-07 19:35:57 +03003255 ret = bdrv_refresh_perms(child_bs, tran, errp);
Kevin Wolfdf581792015-06-15 11:53:47 +02003256
Kevin Wolfe878bb12021-05-03 13:05:54 +02003257out:
3258 tran_finalize(tran, ret);
Vladimir Sementsov-Ogievskiyf8d2ad72021-06-01 10:52:13 +03003259
Kevin Wolf03b9eac2023-10-27 17:53:13 +02003260 bdrv_schedule_unref(child_bs);
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003261
3262 return ret < 0 ? NULL : child;
Kevin Wolfdf581792015-06-15 11:53:47 +02003263}
3264
Alberto Garciab441dc72019-05-13 16:46:18 +03003265/*
3266 * This function transfers the reference to child_bs from the caller
3267 * to parent_bs. That reference is later dropped by parent_bs on
3268 * bdrv_close() or if someone calls bdrv_unref_child().
3269 *
3270 * On failure NULL is returned, errp is set and the reference to
3271 * child_bs is also dropped.
3272 */
Wen Congyang98292c62016-05-10 15:36:38 +08003273BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
3274 BlockDriverState *child_bs,
3275 const char *child_name,
Max Reitzbd86fb92020-05-13 13:05:13 +02003276 const BdrvChildClass *child_class,
Max Reitz258b7762020-05-13 13:05:15 +02003277 BdrvChildRole child_role,
Kevin Wolf8b2ff522016-12-20 22:21:17 +01003278 Error **errp)
Kevin Wolff21d96d2016-03-08 13:47:46 +01003279{
Vladimir Sementsov-Ogievskiyaa5a04c2021-04-28 18:17:47 +03003280 int ret;
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003281 BdrvChild *child;
Vladimir Sementsov-Ogievskiyaa5a04c2021-04-28 18:17:47 +03003282 Transaction *tran = tran_new();
Kevin Wolfd5e6f432016-12-14 17:24:36 +01003283
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05003284 GLOBAL_STATE_CODE();
3285
Fiona Ebner2b833592025-05-30 17:10:47 +02003286 bdrv_drain_all_begin();
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003287 child = bdrv_attach_child_noperm(parent_bs, child_bs, child_name,
3288 child_class, child_role, tran, errp);
3289 if (!child) {
3290 ret = -EINVAL;
Vladimir Sementsov-Ogievskiyaa5a04c2021-04-28 18:17:47 +03003291 goto out;
Kevin Wolfd5e6f432016-12-14 17:24:36 +01003292 }
3293
Vladimir Sementsov-Ogievskiyf1316ed2022-11-07 19:35:57 +03003294 ret = bdrv_refresh_perms(parent_bs, tran, errp);
Vladimir Sementsov-Ogievskiyaa5a04c2021-04-28 18:17:47 +03003295 if (ret < 0) {
3296 goto out;
3297 }
3298
3299out:
3300 tran_finalize(tran, ret);
Fiona Ebner2b833592025-05-30 17:10:47 +02003301 bdrv_drain_all_end();
Vladimir Sementsov-Ogievskiyaa5a04c2021-04-28 18:17:47 +03003302
Kevin Wolfafdaeb92023-09-11 11:46:11 +02003303 bdrv_schedule_unref(child_bs);
Vladimir Sementsov-Ogievskiyaa5a04c2021-04-28 18:17:47 +03003304
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003305 return ret < 0 ? NULL : child;
Kevin Wolff21d96d2016-03-08 13:47:46 +01003306}
3307
Max Reitz7b99a262019-06-12 16:07:11 +02003308/* Callers must ensure that child->frozen is false. */
Kevin Wolff21d96d2016-03-08 13:47:46 +01003309void bdrv_root_unref_child(BdrvChild *child)
Kevin Wolf33a60402015-06-15 13:51:04 +02003310{
Vladimir Sementsov-Ogievskiy00eb93b2022-11-07 19:35:55 +03003311 BlockDriverState *child_bs = child->bs;
Kevin Wolf779020c2015-10-13 14:09:44 +02003312
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05003313 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiy00eb93b2022-11-07 19:35:55 +03003314 bdrv_replace_child_noperm(child, NULL);
3315 bdrv_child_free(child);
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05003316
Vladimir Sementsov-Ogievskiy00eb93b2022-11-07 19:35:55 +03003317 if (child_bs) {
3318 /*
3319 * Update permissions for old node. We're just taking a parent away, so
3320 * we're loosening restrictions. Errors of permission update are not
3321 * fatal in this case, ignore them.
3322 */
Vladimir Sementsov-Ogievskiyf1316ed2022-11-07 19:35:57 +03003323 bdrv_refresh_perms(child_bs, NULL, NULL);
Vladimir Sementsov-Ogievskiy00eb93b2022-11-07 19:35:55 +03003324
3325 /*
3326 * When the parent requiring a non-default AioContext is removed, the
3327 * node moves back to the main AioContext
3328 */
Fiona Ebnera1ea8eb2025-05-30 17:10:46 +02003329 bdrv_drain_all_begin();
3330 bdrv_try_change_aio_context_locked(child_bs, qemu_get_aio_context(),
3331 NULL, NULL);
3332 bdrv_drain_all_end();
Vladimir Sementsov-Ogievskiy00eb93b2022-11-07 19:35:55 +03003333 }
3334
Kevin Wolfede01e42023-09-11 11:46:18 +02003335 bdrv_schedule_unref(child_bs);
Kevin Wolff21d96d2016-03-08 13:47:46 +01003336}
3337
Vladimir Sementsov-Ogievskiy332b3a12021-04-28 18:17:54 +03003338typedef struct BdrvSetInheritsFrom {
3339 BlockDriverState *bs;
3340 BlockDriverState *old_inherits_from;
3341} BdrvSetInheritsFrom;
3342
3343static void bdrv_set_inherits_from_abort(void *opaque)
3344{
3345 BdrvSetInheritsFrom *s = opaque;
3346
3347 s->bs->inherits_from = s->old_inherits_from;
3348}
3349
3350static TransactionActionDrv bdrv_set_inherits_from_drv = {
3351 .abort = bdrv_set_inherits_from_abort,
3352 .clean = g_free,
3353};
3354
3355/* @tran is allowed to be NULL. In this case no rollback is possible */
3356static void bdrv_set_inherits_from(BlockDriverState *bs,
3357 BlockDriverState *new_inherits_from,
3358 Transaction *tran)
3359{
3360 if (tran) {
3361 BdrvSetInheritsFrom *s = g_new(BdrvSetInheritsFrom, 1);
3362
3363 *s = (BdrvSetInheritsFrom) {
3364 .bs = bs,
3365 .old_inherits_from = bs->inherits_from,
3366 };
3367
3368 tran_add(tran, &bdrv_set_inherits_from_drv, s);
3369 }
3370
3371 bs->inherits_from = new_inherits_from;
3372}
3373
Max Reitz3cf746b2019-07-03 19:28:07 +02003374/**
3375 * Clear all inherits_from pointers from children and grandchildren of
3376 * @root that point to @root, where necessary.
Vladimir Sementsov-Ogievskiy332b3a12021-04-28 18:17:54 +03003377 * @tran is allowed to be NULL. In this case no rollback is possible
Max Reitz3cf746b2019-07-03 19:28:07 +02003378 */
Kevin Wolf32a8aba2023-09-11 11:46:19 +02003379static void GRAPH_WRLOCK
3380bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child,
3381 Transaction *tran)
Kevin Wolff21d96d2016-03-08 13:47:46 +01003382{
Max Reitz3cf746b2019-07-03 19:28:07 +02003383 BdrvChild *c;
Kevin Wolf33a60402015-06-15 13:51:04 +02003384
Max Reitz3cf746b2019-07-03 19:28:07 +02003385 if (child->bs->inherits_from == root) {
3386 /*
3387 * Remove inherits_from only when the last reference between root and
3388 * child->bs goes away.
3389 */
3390 QLIST_FOREACH(c, &root->children, next) {
Kevin Wolf4e4bf5c2016-12-16 18:52:37 +01003391 if (c != child && c->bs == child->bs) {
3392 break;
3393 }
3394 }
3395 if (c == NULL) {
Vladimir Sementsov-Ogievskiy332b3a12021-04-28 18:17:54 +03003396 bdrv_set_inherits_from(child->bs, NULL, tran);
Kevin Wolf4e4bf5c2016-12-16 18:52:37 +01003397 }
Kevin Wolf33a60402015-06-15 13:51:04 +02003398 }
3399
Max Reitz3cf746b2019-07-03 19:28:07 +02003400 QLIST_FOREACH(c, &child->bs->children, next) {
Vladimir Sementsov-Ogievskiy332b3a12021-04-28 18:17:54 +03003401 bdrv_unset_inherits_from(root, c, tran);
Max Reitz3cf746b2019-07-03 19:28:07 +02003402 }
3403}
3404
Max Reitz7b99a262019-06-12 16:07:11 +02003405/* Callers must ensure that child->frozen is false. */
Max Reitz3cf746b2019-07-03 19:28:07 +02003406void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
3407{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05003408 GLOBAL_STATE_CODE();
Max Reitz3cf746b2019-07-03 19:28:07 +02003409 if (child == NULL) {
3410 return;
3411 }
3412
Vladimir Sementsov-Ogievskiy332b3a12021-04-28 18:17:54 +03003413 bdrv_unset_inherits_from(parent, child, NULL);
Kevin Wolff21d96d2016-03-08 13:47:46 +01003414 bdrv_root_unref_child(child);
Kevin Wolf33a60402015-06-15 13:51:04 +02003415}
3416
Kevin Wolf5c8cab42016-02-24 15:13:35 +01003417
Kevin Wolf356f4ef2023-09-11 11:46:15 +02003418static void GRAPH_RDLOCK
3419bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
Kevin Wolf5c8cab42016-02-24 15:13:35 +01003420{
3421 BdrvChild *c;
Emanuele Giuseppe Espositof0c28322022-03-03 10:16:13 -05003422 GLOBAL_STATE_CODE();
Kevin Wolf5c8cab42016-02-24 15:13:35 +01003423 QLIST_FOREACH(c, &bs->parents, next_parent) {
Max Reitzbd86fb92020-05-13 13:05:13 +02003424 if (c->klass->change_media) {
3425 c->klass->change_media(c, load);
Kevin Wolf5c8cab42016-02-24 15:13:35 +01003426 }
3427 }
3428}
3429
Alberto Garcia0065c452018-10-31 18:16:37 +02003430/* Return true if you can reach parent going through child->inherits_from
3431 * recursively. If parent or child are NULL, return false */
3432static bool bdrv_inherits_from_recursive(BlockDriverState *child,
3433 BlockDriverState *parent)
3434{
3435 while (child && child != parent) {
3436 child = child->inherits_from;
3437 }
3438
3439 return child != NULL;
3440}
3441
Kevin Wolf5db15a52015-09-14 15:33:33 +02003442/*
Max Reitz25191e52020-05-13 13:05:33 +02003443 * Return the BdrvChildRole for @bs's backing child. bs->backing is
3444 * mostly used for COW backing children (role = COW), but also for
3445 * filtered children (role = FILTERED | PRIMARY).
3446 */
3447static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
3448{
3449 if (bs->drv && bs->drv->is_filter) {
3450 return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
3451 } else {
3452 return BDRV_CHILD_COW;
3453 }
3454}
3455
3456/*
Vladimir Sementsov-Ogievskiye9238272021-06-10 15:05:30 +03003457 * Sets the bs->backing or bs->file link of a BDS. A new reference is created;
3458 * callers which don't need their own reference any more must call bdrv_unref().
Vladimir Sementsov-Ogievskiy7ec390d2021-06-10 14:25:45 +03003459 *
Kevin Wolf7d4ca9d2023-09-11 11:46:09 +02003460 * If the respective child is already present (i.e. we're detaching a node),
3461 * that child node must be drained.
3462 *
Vladimir Sementsov-Ogievskiy7ec390d2021-06-10 14:25:45 +03003463 * Function doesn't update permissions, caller is responsible for this.
Kevin Wolf4b408662023-06-05 10:57:06 +02003464 *
Stefan Hajnoczi23c983c2023-12-05 13:20:11 -05003465 * Both @parent_bs and @child_bs can move to a different AioContext in this
3466 * function.
Kevin Wolf5661a002023-09-11 11:46:10 +02003467 *
3468 * After calling this function, the transaction @tran may only be completed
3469 * while holding a writer lock for the graph.
Fiona Ebner2b833592025-05-30 17:10:47 +02003470 *
3471 * All block nodes must be drained before this function is called until after
3472 * the transaction is finalized.
Kevin Wolf5db15a52015-09-14 15:33:33 +02003473 */
Kevin Wolf7d4ca9d2023-09-11 11:46:09 +02003474static int GRAPH_WRLOCK
3475bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
3476 BlockDriverState *child_bs,
3477 bool is_backing,
3478 Transaction *tran, Error **errp)
Fam Zheng8d24cce2014-05-23 21:29:45 +08003479{
Vladimir Sementsov-Ogievskiye9238272021-06-10 15:05:30 +03003480 bool update_inherits_from =
3481 bdrv_inherits_from_recursive(child_bs, parent_bs);
3482 BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file;
3483 BdrvChildRole role;
Alberto Garcia0065c452018-10-31 18:16:37 +02003484
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05003485 GLOBAL_STATE_CODE();
3486
Vladimir Sementsov-Ogievskiye9238272021-06-10 15:05:30 +03003487 if (!parent_bs->drv) {
3488 /*
3489 * Node without drv is an object without a class :/. TODO: finally fix
3490 * qcow2 driver to never clear bs->drv and implement format corruption
3491 * handling in other way.
3492 */
3493 error_setg(errp, "Node corrupted");
3494 return -EINVAL;
3495 }
3496
3497 if (child && child->frozen) {
3498 error_setg(errp, "Cannot change frozen '%s' link from '%s' to '%s'",
3499 child->name, parent_bs->node_name, child->bs->node_name);
Vladimir Sementsov-Ogievskiya1e708f2021-02-02 15:49:43 +03003500 return -EPERM;
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02003501 }
3502
Vladimir Sementsov-Ogievskiy25f78d92021-06-10 15:05:34 +03003503 if (is_backing && !parent_bs->drv->is_filter &&
3504 !parent_bs->drv->supports_backing)
3505 {
3506 error_setg(errp, "Driver '%s' of node '%s' does not support backing "
3507 "files", parent_bs->drv->format_name, parent_bs->node_name);
3508 return -EINVAL;
3509 }
3510
Vladimir Sementsov-Ogievskiye9238272021-06-10 15:05:30 +03003511 if (parent_bs->drv->is_filter) {
3512 role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
3513 } else if (is_backing) {
3514 role = BDRV_CHILD_COW;
3515 } else {
3516 /*
3517 * We only can use same role as it is in existing child. We don't have
3518 * infrastructure to determine role of file child in generic way
3519 */
3520 if (!child) {
3521 error_setg(errp, "Cannot set file child to format node without "
3522 "file child");
3523 return -EINVAL;
3524 }
3525 role = child->role;
Fam Zheng826b6ca2014-05-23 21:29:47 +08003526 }
3527
Vladimir Sementsov-Ogievskiye9238272021-06-10 15:05:30 +03003528 if (child) {
Kevin Wolf7d4ca9d2023-09-11 11:46:09 +02003529 assert(child->bs->quiesce_counter);
Vladimir Sementsov-Ogievskiye9238272021-06-10 15:05:30 +03003530 bdrv_unset_inherits_from(parent_bs, child, tran);
Vladimir Sementsov-Ogievskiy57f08942022-07-26 23:11:34 +03003531 bdrv_remove_child(child, tran);
Vladimir Sementsov-Ogievskiye9238272021-06-10 15:05:30 +03003532 }
3533
3534 if (!child_bs) {
Fam Zheng8d24cce2014-05-23 21:29:45 +08003535 goto out;
3536 }
Kevin Wolf12fa4af2017-02-17 20:42:32 +01003537
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003538 child = bdrv_attach_child_noperm(parent_bs, child_bs,
3539 is_backing ? "backing" : "file",
3540 &child_of_bds, role,
3541 tran, errp);
3542 if (!child) {
3543 return -EINVAL;
Vladimir Sementsov-Ogievskiya1e708f2021-02-02 15:49:43 +03003544 }
3545
Vladimir Sementsov-Ogievskiy160333e2021-04-28 18:17:56 +03003546
3547 /*
Vladimir Sementsov-Ogievskiye9238272021-06-10 15:05:30 +03003548 * If inherits_from pointed recursively to bs then let's update it to
Vladimir Sementsov-Ogievskiy160333e2021-04-28 18:17:56 +03003549 * point directly to bs (else it will become NULL).
3550 */
Vladimir Sementsov-Ogievskiya1e708f2021-02-02 15:49:43 +03003551 if (update_inherits_from) {
Vladimir Sementsov-Ogievskiye9238272021-06-10 15:05:30 +03003552 bdrv_set_inherits_from(child_bs, parent_bs, tran);
Alberto Garcia0065c452018-10-31 18:16:37 +02003553 }
Fam Zheng826b6ca2014-05-23 21:29:47 +08003554
Fam Zheng8d24cce2014-05-23 21:29:45 +08003555out:
Vladimir Sementsov-Ogievskiye9238272021-06-10 15:05:30 +03003556 bdrv_refresh_limits(parent_bs, tran, NULL);
Vladimir Sementsov-Ogievskiy160333e2021-04-28 18:17:56 +03003557
3558 return 0;
3559}
3560
Kevin Wolf4b408662023-06-05 10:57:06 +02003561/*
Stefan Hajnoczi23c983c2023-12-05 13:20:11 -05003562 * Both @bs and @backing_hd can move to a different AioContext in this
3563 * function.
Kevin Wolf7d4ca9d2023-09-11 11:46:09 +02003564 *
Fiona Ebnere66dbda2025-05-30 17:10:48 +02003565 * All block nodes must be drained.
Kevin Wolf4b408662023-06-05 10:57:06 +02003566 */
Kevin Wolf92140b92022-11-18 18:41:04 +01003567int bdrv_set_backing_hd_drained(BlockDriverState *bs,
3568 BlockDriverState *backing_hd,
3569 Error **errp)
Vladimir Sementsov-Ogievskiy160333e2021-04-28 18:17:56 +03003570{
3571 int ret;
3572 Transaction *tran = tran_new();
3573
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05003574 GLOBAL_STATE_CODE();
Kevin Wolf92140b92022-11-18 18:41:04 +01003575 assert(bs->quiesce_counter > 0);
Kevin Wolf7d4ca9d2023-09-11 11:46:09 +02003576 if (bs->backing) {
3577 assert(bs->backing->bs->quiesce_counter > 0);
3578 }
Vladimir Sementsov-Ogievskiyc0829cb2022-01-24 18:37:41 +01003579
Kevin Wolf3204c2e2023-10-27 17:53:23 +02003580 ret = bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
Vladimir Sementsov-Ogievskiy160333e2021-04-28 18:17:56 +03003581 if (ret < 0) {
3582 goto out;
3583 }
3584
Vladimir Sementsov-Ogievskiyf1316ed2022-11-07 19:35:57 +03003585 ret = bdrv_refresh_perms(bs, tran, errp);
Vladimir Sementsov-Ogievskiy160333e2021-04-28 18:17:56 +03003586out:
3587 tran_finalize(tran, ret);
Kevin Wolf92140b92022-11-18 18:41:04 +01003588 return ret;
3589}
Vladimir Sementsov-Ogievskiya1e708f2021-02-02 15:49:43 +03003590
Kevin Wolf92140b92022-11-18 18:41:04 +01003591int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
3592 Error **errp)
3593{
3594 int ret;
3595 GLOBAL_STATE_CODE();
3596
Fiona Ebnere66dbda2025-05-30 17:10:48 +02003597 bdrv_drain_all_begin();
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05003598 bdrv_graph_wrlock();
Kevin Wolf92140b92022-11-18 18:41:04 +01003599 ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp);
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05003600 bdrv_graph_wrunlock();
Fiona Ebnere66dbda2025-05-30 17:10:48 +02003601 bdrv_drain_all_end();
Vladimir Sementsov-Ogievskiyc0829cb2022-01-24 18:37:41 +01003602
Vladimir Sementsov-Ogievskiya1e708f2021-02-02 15:49:43 +03003603 return ret;
Fam Zheng8d24cce2014-05-23 21:29:45 +08003604}
3605
Kevin Wolf31ca6d02013-03-28 15:29:24 +01003606/*
3607 * Opens the backing file for a BlockDriverState if not yet open
3608 *
Kevin Wolfd9b7b052015-01-16 18:23:41 +01003609 * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
3610 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
3611 * itself, all options starting with "${bdref_key}." are considered part of the
3612 * BlockdevRef.
3613 *
3614 * TODO Can this be unified with bdrv_open_image()?
Kevin Wolf31ca6d02013-03-28 15:29:24 +01003615 */
Kevin Wolfd9b7b052015-01-16 18:23:41 +01003616int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
3617 const char *bdref_key, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02003618{
Zhao Liu7b22e052024-03-11 11:37:56 +08003619 ERRP_GUARD();
Max Reitz6b6833c2019-02-01 20:29:15 +01003620 char *backing_filename = NULL;
Kevin Wolfd9b7b052015-01-16 18:23:41 +01003621 char *bdref_key_dot;
3622 const char *reference = NULL;
Kevin Wolf317fc442014-04-25 13:27:34 +02003623 int ret = 0;
Max Reitz998c2012019-02-01 20:29:08 +01003624 bool implicit_backing = false;
Fam Zheng8d24cce2014-05-23 21:29:45 +08003625 BlockDriverState *backing_hd;
Kevin Wolfd9b7b052015-01-16 18:23:41 +01003626 QDict *options;
3627 QDict *tmp_parent_options = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +02003628 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02003629
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05003630 GLOBAL_STATE_CODE();
Kevin Wolf004915a2023-10-27 17:53:26 +02003631 GRAPH_RDLOCK_GUARD_MAINLOOP();
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05003632
Kevin Wolf760e0062015-06-17 14:55:21 +02003633 if (bs->backing != NULL) {
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02003634 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02003635 }
3636
Kevin Wolf31ca6d02013-03-28 15:29:24 +01003637 /* NULL means an empty set of options */
Kevin Wolfd9b7b052015-01-16 18:23:41 +01003638 if (parent_options == NULL) {
3639 tmp_parent_options = qdict_new();
3640 parent_options = tmp_parent_options;
Kevin Wolf31ca6d02013-03-28 15:29:24 +01003641 }
3642
Paolo Bonzini9156df12012-10-18 16:49:17 +02003643 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolfd9b7b052015-01-16 18:23:41 +01003644
3645 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
3646 qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
3647 g_free(bdref_key_dot);
3648
Markus Armbruster129c7d12017-03-30 19:43:12 +02003649 /*
3650 * Caution: while qdict_get_try_str() is fine, getting non-string
3651 * types would require more care. When @parent_options come from
3652 * -blockdev or blockdev_add, its members are typed according to
3653 * the QAPI schema, but when they come from -drive, they're all
3654 * QString.
3655 */
Kevin Wolfd9b7b052015-01-16 18:23:41 +01003656 reference = qdict_get_try_str(parent_options, bdref_key);
3657 if (reference || qdict_haskey(options, "file.filename")) {
Max Reitz6b6833c2019-02-01 20:29:15 +01003658 /* keep backing_filename NULL */
Kevin Wolf1cb6f502013-04-12 20:27:07 +02003659 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02003660 qobject_unref(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02003661 goto free_exit;
Fam Zhengdbecebd2013-09-22 20:05:06 +08003662 } else {
Max Reitz998c2012019-02-01 20:29:08 +01003663 if (qdict_size(options) == 0) {
3664 /* If the user specifies options that do not modify the
3665 * backing file's behavior, we might still consider it the
3666 * implicit backing file. But it's easier this way, and
3667 * just specifying some of the backing BDS's options is
3668 * only possible with -drive anyway (otherwise the QAPI
3669 * schema forces the user to specify everything). */
3670 implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file);
3671 }
3672
Max Reitz6b6833c2019-02-01 20:29:15 +01003673 backing_filename = bdrv_get_full_backing_filename(bs, &local_err);
Max Reitz9f074292014-11-26 17:20:26 +01003674 if (local_err) {
3675 ret = -EINVAL;
3676 error_propagate(errp, local_err);
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02003677 qobject_unref(options);
Max Reitz9f074292014-11-26 17:20:26 +01003678 goto free_exit;
3679 }
Paolo Bonzini9156df12012-10-18 16:49:17 +02003680 }
3681
Kevin Wolf8ee79e72014-06-04 15:09:35 +02003682 if (!bs->drv || !bs->drv->supports_backing) {
3683 ret = -EINVAL;
3684 error_setg(errp, "Driver doesn't support backing files");
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02003685 qobject_unref(options);
Kevin Wolf8ee79e72014-06-04 15:09:35 +02003686 goto free_exit;
3687 }
3688
Peter Krempa6bff5972017-10-12 16:14:10 +02003689 if (!reference &&
3690 bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
Eric Blake46f5ac22017-04-27 16:58:17 -05003691 qdict_put_str(options, "driver", bs->backing_format);
Paolo Bonzini9156df12012-10-18 16:49:17 +02003692 }
3693
Max Reitz6b6833c2019-02-01 20:29:15 +01003694 backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs,
Kevin Wolf7ead9462024-04-25 14:56:02 +02003695 &child_of_bds, bdrv_backing_role(bs), true,
3696 errp);
Max Reitz5b363932016-05-17 16:41:31 +02003697 if (!backing_hd) {
Paolo Bonzini9156df12012-10-18 16:49:17 +02003698 bs->open_flags |= BDRV_O_NO_BACKING;
Markus Armbrustere43bfd92015-12-18 16:35:15 +01003699 error_prepend(errp, "Could not open backing file: ");
Max Reitz5b363932016-05-17 16:41:31 +02003700 ret = -EINVAL;
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02003701 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02003702 }
Kevin Wolfdf581792015-06-15 11:53:47 +02003703
Max Reitz998c2012019-02-01 20:29:08 +01003704 if (implicit_backing) {
3705 bdrv_refresh_filename(backing_hd);
3706 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
3707 backing_hd->filename);
3708 }
3709
Kevin Wolf5db15a52015-09-14 15:33:33 +02003710 /* Hook up the backing file link; drop our reference, bs owns the
3711 * backing_hd reference now */
Vladimir Sementsov-Ogievskiydc9c10a2021-02-02 15:49:47 +03003712 ret = bdrv_set_backing_hd(bs, backing_hd, errp);
Kevin Wolf5db15a52015-09-14 15:33:33 +02003713 bdrv_unref(backing_hd);
Kevin Wolf8aa04542023-06-05 10:57:08 +02003714
Vladimir Sementsov-Ogievskiydc9c10a2021-02-02 15:49:47 +03003715 if (ret < 0) {
Kevin Wolf12fa4af2017-02-17 20:42:32 +01003716 goto free_exit;
3717 }
Peter Feinerd80ac652014-01-08 19:43:25 +00003718
Kevin Wolfd9b7b052015-01-16 18:23:41 +01003719 qdict_del(parent_options, bdref_key);
3720
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02003721free_exit:
3722 g_free(backing_filename);
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02003723 qobject_unref(tmp_parent_options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02003724 return ret;
Paolo Bonzini9156df12012-10-18 16:49:17 +02003725}
3726
Kevin Wolf2d6b86a2017-02-17 17:43:59 +01003727static BlockDriverState *
3728bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
Max Reitzbd86fb92020-05-13 13:05:13 +02003729 BlockDriverState *parent, const BdrvChildClass *child_class,
Kevin Wolf7ead9462024-04-25 14:56:02 +02003730 BdrvChildRole child_role, bool allow_none,
3731 bool parse_filename, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01003732{
Kevin Wolf2d6b86a2017-02-17 17:43:59 +01003733 BlockDriverState *bs = NULL;
Max Reitzda557aa2013-12-20 19:28:11 +01003734 QDict *image_options;
Max Reitzda557aa2013-12-20 19:28:11 +01003735 char *bdref_key_dot;
3736 const char *reference;
3737
Max Reitzbd86fb92020-05-13 13:05:13 +02003738 assert(child_class != NULL);
Max Reitzf67503e2014-02-18 18:33:05 +01003739
Max Reitzda557aa2013-12-20 19:28:11 +01003740 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
3741 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
3742 g_free(bdref_key_dot);
3743
Markus Armbruster129c7d12017-03-30 19:43:12 +02003744 /*
3745 * Caution: while qdict_get_try_str() is fine, getting non-string
3746 * types would require more care. When @options come from
3747 * -blockdev or blockdev_add, its members are typed according to
3748 * the QAPI schema, but when they come from -drive, they're all
3749 * QString.
3750 */
Max Reitzda557aa2013-12-20 19:28:11 +01003751 reference = qdict_get_try_str(options, bdref_key);
3752 if (!filename && !reference && !qdict_size(image_options)) {
Kevin Wolfb4b059f2015-06-15 13:24:19 +02003753 if (!allow_none) {
Max Reitzda557aa2013-12-20 19:28:11 +01003754 error_setg(errp, "A block device must be specified for \"%s\"",
3755 bdref_key);
Max Reitzda557aa2013-12-20 19:28:11 +01003756 }
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02003757 qobject_unref(image_options);
Max Reitzda557aa2013-12-20 19:28:11 +01003758 goto done;
3759 }
3760
Max Reitz5b363932016-05-17 16:41:31 +02003761 bs = bdrv_open_inherit(filename, reference, image_options, 0,
Kevin Wolf7ead9462024-04-25 14:56:02 +02003762 parent, child_class, child_role, parse_filename,
3763 errp);
Max Reitz5b363932016-05-17 16:41:31 +02003764 if (!bs) {
Kevin Wolfdf581792015-06-15 11:53:47 +02003765 goto done;
3766 }
3767
Max Reitzda557aa2013-12-20 19:28:11 +01003768done:
3769 qdict_del(options, bdref_key);
Kevin Wolf2d6b86a2017-02-17 17:43:59 +01003770 return bs;
3771}
3772
Kevin Wolf7ead9462024-04-25 14:56:02 +02003773static BdrvChild *bdrv_open_child_common(const char *filename,
3774 QDict *options, const char *bdref_key,
3775 BlockDriverState *parent,
3776 const BdrvChildClass *child_class,
3777 BdrvChildRole child_role,
3778 bool allow_none, bool parse_filename,
3779 Error **errp)
3780{
3781 BlockDriverState *bs;
3782 BdrvChild *child;
3783
3784 GLOBAL_STATE_CODE();
3785
3786 bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class,
3787 child_role, allow_none, parse_filename, errp);
3788 if (bs == NULL) {
3789 return NULL;
3790 }
3791
3792 bdrv_graph_wrlock();
3793 child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
3794 errp);
3795 bdrv_graph_wrunlock();
3796
3797 return child;
3798}
3799
Kevin Wolf2d6b86a2017-02-17 17:43:59 +01003800/*
3801 * Opens a disk image whose options are given as BlockdevRef in another block
3802 * device's options.
3803 *
3804 * If allow_none is true, no image will be opened if filename is false and no
3805 * BlockdevRef is given. NULL will be returned, but errp remains unset.
3806 *
3807 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
3808 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
3809 * itself, all options starting with "${bdref_key}." are considered part of the
3810 * BlockdevRef.
3811 *
3812 * The BlockdevRef will be removed from the options QDict.
Kevin Wolfaa269ff2023-05-25 14:47:07 +02003813 *
Stefan Hajnoczi23c983c2023-12-05 13:20:11 -05003814 * @parent can move to a different AioContext in this function.
Kevin Wolf2d6b86a2017-02-17 17:43:59 +01003815 */
3816BdrvChild *bdrv_open_child(const char *filename,
3817 QDict *options, const char *bdref_key,
3818 BlockDriverState *parent,
Max Reitzbd86fb92020-05-13 13:05:13 +02003819 const BdrvChildClass *child_class,
Max Reitz258b7762020-05-13 13:05:15 +02003820 BdrvChildRole child_role,
Kevin Wolf2d6b86a2017-02-17 17:43:59 +01003821 bool allow_none, Error **errp)
3822{
Kevin Wolf7ead9462024-04-25 14:56:02 +02003823 return bdrv_open_child_common(filename, options, bdref_key, parent,
3824 child_class, child_role, allow_none, false,
3825 errp);
Kevin Wolfb4b059f2015-06-15 13:24:19 +02003826}
3827
Max Reitzbd86fb92020-05-13 13:05:13 +02003828/*
Kevin Wolf7ead9462024-04-25 14:56:02 +02003829 * This does mostly the same as bdrv_open_child(), but for opening the primary
3830 * child of a node. A notable difference from bdrv_open_child() is that it
3831 * enables filename parsing for protocol names (including json:).
Kevin Wolfaa269ff2023-05-25 14:47:07 +02003832 *
Stefan Hajnoczi23c983c2023-12-05 13:20:11 -05003833 * @parent can move to a different AioContext in this function.
Vladimir Sementsov-Ogievskiy83930782022-07-26 23:11:21 +03003834 */
3835int bdrv_open_file_child(const char *filename,
3836 QDict *options, const char *bdref_key,
3837 BlockDriverState *parent, Error **errp)
3838{
3839 BdrvChildRole role;
3840
3841 /* commit_top and mirror_top don't use this function */
3842 assert(!parent->drv->filtered_child_is_backing);
Vladimir Sementsov-Ogievskiy83930782022-07-26 23:11:21 +03003843 role = parent->drv->is_filter ?
3844 (BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE;
3845
Kevin Wolf7ead9462024-04-25 14:56:02 +02003846 if (!bdrv_open_child_common(filename, options, bdref_key, parent,
3847 &child_of_bds, role, false, true, errp))
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003848 {
3849 return -EINVAL;
3850 }
Vladimir Sementsov-Ogievskiy83930782022-07-26 23:11:21 +03003851
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03003852 return 0;
Vladimir Sementsov-Ogievskiy83930782022-07-26 23:11:21 +03003853}
3854
3855/*
Max Reitzbd86fb92020-05-13 13:05:13 +02003856 * TODO Future callers may need to specify parent/child_class in order for
3857 * option inheritance to work. Existing callers use it for the root node.
3858 */
Kevin Wolfe1d74bc2018-01-10 15:52:33 +01003859BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp)
3860{
3861 BlockDriverState *bs = NULL;
Kevin Wolfe1d74bc2018-01-10 15:52:33 +01003862 QObject *obj = NULL;
3863 QDict *qdict = NULL;
3864 const char *reference = NULL;
3865 Visitor *v = NULL;
3866
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05003867 GLOBAL_STATE_CODE();
3868
Kevin Wolfe1d74bc2018-01-10 15:52:33 +01003869 if (ref->type == QTYPE_QSTRING) {
3870 reference = ref->u.reference;
3871 } else {
3872 BlockdevOptions *options = &ref->u.definition;
3873 assert(ref->type == QTYPE_QDICT);
3874
3875 v = qobject_output_visitor_new(&obj);
Markus Armbruster1f584242020-04-24 10:43:35 +02003876 visit_type_BlockdevOptions(v, NULL, &options, &error_abort);
Kevin Wolfe1d74bc2018-01-10 15:52:33 +01003877 visit_complete(v, &obj);
3878
Max Reitz7dc847e2018-02-24 16:40:29 +01003879 qdict = qobject_to(QDict, obj);
Kevin Wolfe1d74bc2018-01-10 15:52:33 +01003880 qdict_flatten(qdict);
3881
3882 /* bdrv_open_inherit() defaults to the values in bdrv_flags (for
3883 * compatibility with other callers) rather than what we want as the
3884 * real defaults. Apply the defaults here instead. */
3885 qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off");
3886 qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off");
3887 qdict_set_default_str(qdict, BDRV_OPT_READ_ONLY, "off");
Kevin Wolfe35bdc12018-10-05 18:57:40 +02003888 qdict_set_default_str(qdict, BDRV_OPT_AUTO_READ_ONLY, "off");
3889
Kevin Wolfe1d74bc2018-01-10 15:52:33 +01003890 }
3891
Kevin Wolf7ead9462024-04-25 14:56:02 +02003892 bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, false,
3893 errp);
Kevin Wolfe1d74bc2018-01-10 15:52:33 +01003894 obj = NULL;
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02003895 qobject_unref(obj);
Kevin Wolfe1d74bc2018-01-10 15:52:33 +01003896 visit_free(v);
3897 return bs;
3898}
3899
Max Reitz66836182016-05-17 16:41:27 +02003900static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
3901 int flags,
3902 QDict *snapshot_options,
3903 Error **errp)
Kevin Wolfb9988752014-04-03 12:09:34 +02003904{
Zhao Liu7b22e052024-03-11 11:37:56 +08003905 ERRP_GUARD();
Bin Meng69fbfff2022-10-10 12:04:31 +08003906 g_autofree char *tmp_filename = NULL;
Kevin Wolfb9988752014-04-03 12:09:34 +02003907 int64_t total_size;
Chunyan Liu83d05212014-06-05 17:20:51 +08003908 QemuOpts *opts = NULL;
Eric Blakeff6ed712017-04-27 16:58:18 -05003909 BlockDriverState *bs_snapshot = NULL;
Kevin Wolfb9988752014-04-03 12:09:34 +02003910 int ret;
3911
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05003912 GLOBAL_STATE_CODE();
3913
Kevin Wolfb9988752014-04-03 12:09:34 +02003914 /* if snapshot, we create a temporary backing file and open it
3915 instead of opening 'filename' directly */
3916
3917 /* Get the required size from the image */
Kevin Wolff1877432014-04-04 17:07:19 +02003918 total_size = bdrv_getlength(bs);
Kevin Wolff665f012023-06-05 10:57:07 +02003919
Kevin Wolff1877432014-04-04 17:07:19 +02003920 if (total_size < 0) {
3921 error_setg_errno(errp, -total_size, "Could not get image size");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02003922 goto out;
Kevin Wolff1877432014-04-04 17:07:19 +02003923 }
Kevin Wolfb9988752014-04-03 12:09:34 +02003924
3925 /* Create the temporary image */
Bin Meng69fbfff2022-10-10 12:04:31 +08003926 tmp_filename = create_tmp_file(errp);
3927 if (!tmp_filename) {
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02003928 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02003929 }
3930
Max Reitzef810432014-12-02 18:32:42 +01003931 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
Chunyan Liuc282e1f2014-06-05 17:21:11 +08003932 &error_abort);
Markus Armbruster39101f22015-02-12 16:46:36 +01003933 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
Markus Armbrustere43bfd92015-12-18 16:35:15 +01003934 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
Chunyan Liu83d05212014-06-05 17:20:51 +08003935 qemu_opts_del(opts);
Kevin Wolfb9988752014-04-03 12:09:34 +02003936 if (ret < 0) {
Markus Armbrustere43bfd92015-12-18 16:35:15 +01003937 error_prepend(errp, "Could not create temporary overlay '%s': ",
3938 tmp_filename);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02003939 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02003940 }
3941
Kevin Wolf73176be2016-03-07 13:02:15 +01003942 /* Prepare options QDict for the temporary file */
Eric Blake46f5ac22017-04-27 16:58:17 -05003943 qdict_put_str(snapshot_options, "file.driver", "file");
3944 qdict_put_str(snapshot_options, "file.filename", tmp_filename);
3945 qdict_put_str(snapshot_options, "driver", "qcow2");
Kevin Wolfb9988752014-04-03 12:09:34 +02003946
Max Reitz5b363932016-05-17 16:41:31 +02003947 bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp);
Kevin Wolf73176be2016-03-07 13:02:15 +01003948 snapshot_options = NULL;
Max Reitz5b363932016-05-17 16:41:31 +02003949 if (!bs_snapshot) {
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02003950 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02003951 }
3952
Vladimir Sementsov-Ogievskiy934aee12021-02-02 15:49:44 +03003953 ret = bdrv_append(bs_snapshot, bs, errp);
3954 if (ret < 0) {
Eric Blakeff6ed712017-04-27 16:58:18 -05003955 bs_snapshot = NULL;
Kevin Wolfb2c28322017-02-20 12:46:42 +01003956 goto out;
3957 }
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02003958
3959out:
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02003960 qobject_unref(snapshot_options);
Eric Blakeff6ed712017-04-27 16:58:18 -05003961 return bs_snapshot;
Kevin Wolfb9988752014-04-03 12:09:34 +02003962}
3963
Max Reitzda557aa2013-12-20 19:28:11 +01003964/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02003965 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01003966 *
3967 * options is a QDict of options to pass to the block drivers, or NULL for an
3968 * empty set of options. The reference to the QDict belongs to the block layer
3969 * after the call (even on failure), so if the caller intends to reuse the
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02003970 * dictionary, it needs to use qobject_ref() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01003971 *
3972 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
3973 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01003974 *
3975 * The reference parameter may be used to specify an existing block device which
3976 * should be opened. If specified, neither options nor a filename may be given,
3977 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02003978 */
Kevin Wolf32192302023-01-26 18:24:32 +01003979static BlockDriverState * no_coroutine_fn
3980bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
3981 int flags, BlockDriverState *parent,
3982 const BdrvChildClass *child_class, BdrvChildRole child_role,
Kevin Wolf7ead9462024-04-25 14:56:02 +02003983 bool parse_filename, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00003984{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02003985 int ret;
Kevin Wolf5696c6e2017-02-17 18:39:24 +01003986 BlockBackend *file = NULL;
Kevin Wolf9a4f4c32015-06-16 14:19:22 +02003987 BlockDriverState *bs;
Max Reitzce343772015-08-26 19:47:50 +02003988 BlockDriver *drv = NULL;
Alberto Garcia2f624b82018-06-29 14:37:00 +03003989 BdrvChild *child;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02003990 const char *drvname;
Alberto Garcia3e8c2e52015-10-26 14:27:15 +02003991 const char *backing;
Max Reitz34b5d2c2013-09-05 14:45:29 +02003992 Error *local_err = NULL;
Kevin Wolf73176be2016-03-07 13:02:15 +01003993 QDict *snapshot_options = NULL;
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02003994 int snapshot_flags = 0;
bellard712e7872005-04-28 21:09:32 +00003995
Max Reitzbd86fb92020-05-13 13:05:13 +02003996 assert(!child_class || !flags);
3997 assert(!child_class == !parent);
Emanuele Giuseppe Espositof0c28322022-03-03 10:16:13 -05003998 GLOBAL_STATE_CODE();
Kevin Wolf32192302023-01-26 18:24:32 +01003999 assert(!qemu_in_coroutine());
Max Reitzf67503e2014-02-18 18:33:05 +01004000
Kevin Wolf356f4ef2023-09-11 11:46:15 +02004001 /* TODO We'll eventually have to take a writer lock in this function */
4002 GRAPH_RDLOCK_GUARD_MAINLOOP();
4003
Max Reitzddf56362014-02-18 18:33:06 +01004004 if (reference) {
4005 bool options_non_empty = options ? qdict_size(options) : false;
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02004006 qobject_unref(options);
Max Reitzddf56362014-02-18 18:33:06 +01004007
Max Reitzddf56362014-02-18 18:33:06 +01004008 if (filename || options_non_empty) {
4009 error_setg(errp, "Cannot reference an existing block device with "
4010 "additional options or a new filename");
Max Reitz5b363932016-05-17 16:41:31 +02004011 return NULL;
Max Reitzddf56362014-02-18 18:33:06 +01004012 }
4013
4014 bs = bdrv_lookup_bs(reference, reference, errp);
4015 if (!bs) {
Max Reitz5b363932016-05-17 16:41:31 +02004016 return NULL;
Max Reitzddf56362014-02-18 18:33:06 +01004017 }
Kevin Wolf76b22322016-04-04 17:11:13 +02004018
Max Reitzddf56362014-02-18 18:33:06 +01004019 bdrv_ref(bs);
Max Reitz5b363932016-05-17 16:41:31 +02004020 return bs;
Max Reitzddf56362014-02-18 18:33:06 +01004021 }
4022
Max Reitz5b363932016-05-17 16:41:31 +02004023 bs = bdrv_new();
Max Reitzf67503e2014-02-18 18:33:05 +01004024
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01004025 /* NULL means an empty set of options */
4026 if (options == NULL) {
4027 options = qdict_new();
4028 }
4029
Kevin Wolf145f5982015-05-08 16:15:03 +02004030 /* json: syntax counts as explicit options, as if in the QDict */
Kevin Wolf7ead9462024-04-25 14:56:02 +02004031 if (parse_filename) {
4032 parse_json_protocol(options, &filename, &local_err);
4033 if (local_err) {
4034 goto fail;
4035 }
Kevin Wolfde3b53f2015-10-29 15:24:41 +01004036 }
4037
Kevin Wolf145f5982015-05-08 16:15:03 +02004038 bs->explicit_options = qdict_clone_shallow(options);
4039
Max Reitzbd86fb92020-05-13 13:05:13 +02004040 if (child_class) {
Max Reitz3cdc69d2020-05-13 13:05:18 +02004041 bool parent_is_format;
4042
4043 if (parent->drv) {
4044 parent_is_format = parent->drv->is_format;
4045 } else {
4046 /*
4047 * parent->drv is not set yet because this node is opened for
4048 * (potential) format probing. That means that @parent is going
4049 * to be a format node.
4050 */
4051 parent_is_format = true;
4052 }
4053
Kevin Wolfbddcec32015-04-09 18:47:50 +02004054 bs->inherits_from = parent;
Max Reitz3cdc69d2020-05-13 13:05:18 +02004055 child_class->inherit_options(child_role, parent_is_format,
4056 &flags, options,
Max Reitzbd86fb92020-05-13 13:05:13 +02004057 parent->open_flags, parent->options);
Kevin Wolff3930ed2015-04-08 13:43:47 +02004058 }
4059
Kevin Wolf7ead9462024-04-25 14:56:02 +02004060 ret = bdrv_fill_options(&options, filename, &flags, parse_filename,
4061 &local_err);
Philippe Mathieu-Daudédfde4832020-04-22 15:31:44 +02004062 if (ret < 0) {
Kevin Wolf462f5bc2014-05-26 11:39:55 +02004063 goto fail;
4064 }
4065
Markus Armbruster129c7d12017-03-30 19:43:12 +02004066 /*
4067 * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags.
4068 * Caution: getting a boolean member of @options requires care.
4069 * When @options come from -blockdev or blockdev_add, members are
4070 * typed according to the QAPI schema, but when they come from
4071 * -drive, they're all QString.
4072 */
Alberto Garciaf87a0e22016-09-15 17:53:02 +03004073 if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") &&
4074 !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) {
4075 flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR);
4076 } else {
4077 flags &= ~BDRV_O_RDWR;
Alberto Garcia14499ea2016-09-15 17:53:00 +03004078 }
4079
4080 if (flags & BDRV_O_SNAPSHOT) {
4081 snapshot_options = qdict_new();
4082 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
4083 flags, options);
Alberto Garciaf87a0e22016-09-15 17:53:02 +03004084 /* Let bdrv_backing_options() override "read-only" */
4085 qdict_del(options, BDRV_OPT_READ_ONLY);
Max Reitz00ff7ff2020-05-13 13:05:21 +02004086 bdrv_inherited_options(BDRV_CHILD_COW, true,
4087 &flags, options, flags, options);
Alberto Garcia14499ea2016-09-15 17:53:00 +03004088 }
4089
Kevin Wolf62392eb2015-04-24 16:38:02 +02004090 bs->open_flags = flags;
4091 bs->options = options;
4092 options = qdict_clone_shallow(options);
4093
Kevin Wolf76c591b2014-06-04 14:19:44 +02004094 /* Find the right image format driver */
Markus Armbruster129c7d12017-03-30 19:43:12 +02004095 /* See cautionary note on accessing @options above */
Kevin Wolf76c591b2014-06-04 14:19:44 +02004096 drvname = qdict_get_try_str(options, "driver");
4097 if (drvname) {
4098 drv = bdrv_find_format(drvname);
Kevin Wolf76c591b2014-06-04 14:19:44 +02004099 if (!drv) {
4100 error_setg(errp, "Unknown driver: '%s'", drvname);
Kevin Wolf76c591b2014-06-04 14:19:44 +02004101 goto fail;
4102 }
4103 }
4104
4105 assert(drvname || !(flags & BDRV_O_PROTOCOL));
Kevin Wolf76c591b2014-06-04 14:19:44 +02004106
Markus Armbruster129c7d12017-03-30 19:43:12 +02004107 /* See cautionary note on accessing @options above */
Alberto Garcia3e8c2e52015-10-26 14:27:15 +02004108 backing = qdict_get_try_str(options, "backing");
Max Reitze59a0cf2018-02-24 16:40:32 +01004109 if (qobject_to(QNull, qdict_get(options, "backing")) != NULL ||
4110 (backing && *backing == '\0'))
4111 {
Max Reitz4f7be282018-02-24 16:40:33 +01004112 if (backing) {
4113 warn_report("Use of \"backing\": \"\" is deprecated; "
4114 "use \"backing\": null instead");
4115 }
Alberto Garcia3e8c2e52015-10-26 14:27:15 +02004116 flags |= BDRV_O_NO_BACKING;
Kevin Wolfae0f57f2019-11-08 09:36:35 +01004117 qdict_del(bs->explicit_options, "backing");
4118 qdict_del(bs->options, "backing");
Alberto Garcia3e8c2e52015-10-26 14:27:15 +02004119 qdict_del(options, "backing");
4120 }
4121
Kevin Wolf5696c6e2017-02-17 18:39:24 +01004122 /* Open image file without format layer. This BlockBackend is only used for
Kevin Wolf4e4bf5c2016-12-16 18:52:37 +01004123 * probing, the block drivers will do their own bdrv_open_child() for the
4124 * same BDS, which is why we put the node name back into options. */
Kevin Wolff4788ad2014-06-03 16:44:19 +02004125 if ((flags & BDRV_O_PROTOCOL) == 0) {
Kevin Wolf5696c6e2017-02-17 18:39:24 +01004126 BlockDriverState *file_bs;
4127
4128 file_bs = bdrv_open_child_bs(filename, options, "file", bs,
Max Reitz58944402020-05-13 13:05:37 +02004129 &child_of_bds, BDRV_CHILD_IMAGE,
Kevin Wolf7ead9462024-04-25 14:56:02 +02004130 true, true, &local_err);
Kevin Wolf1fdd6932015-06-15 14:11:51 +02004131 if (local_err) {
Max Reitz5469a2a2014-02-18 18:33:10 +01004132 goto fail;
4133 }
Kevin Wolf5696c6e2017-02-17 18:39:24 +01004134 if (file_bs != NULL) {
Kevin Wolfdacaa162017-11-20 14:59:13 +01004135 /* Not requesting BLK_PERM_CONSISTENT_READ because we're only
4136 * looking at the header to guess the image format. This works even
4137 * in cases where a guest would not see a consistent state. */
Stefan Hajnoczib49f4752023-12-05 13:20:03 -05004138 AioContext *ctx = bdrv_get_aio_context(file_bs);
Kevin Wolff665f012023-06-05 10:57:07 +02004139 file = blk_new(ctx, 0, BLK_PERM_ALL);
Kevin Wolfd7086422017-01-13 19:02:32 +01004140 blk_insert_bs(file, file_bs, &local_err);
Kevin Wolf5696c6e2017-02-17 18:39:24 +01004141 bdrv_unref(file_bs);
Kevin Wolff665f012023-06-05 10:57:07 +02004142
Kevin Wolfd7086422017-01-13 19:02:32 +01004143 if (local_err) {
4144 goto fail;
4145 }
Kevin Wolf5696c6e2017-02-17 18:39:24 +01004146
Eric Blake46f5ac22017-04-27 16:58:17 -05004147 qdict_put_str(options, "file", bdrv_get_node_name(file_bs));
Kevin Wolf4e4bf5c2016-12-16 18:52:37 +01004148 }
Max Reitz5469a2a2014-02-18 18:33:10 +01004149 }
4150
Kevin Wolf76c591b2014-06-04 14:19:44 +02004151 /* Image format probing */
Kevin Wolf38f3ef52014-11-20 16:27:12 +01004152 bs->probed = !drv;
Kevin Wolf76c591b2014-06-04 14:19:44 +02004153 if (!drv && file) {
Kevin Wolfcf2ab8f2016-06-20 18:24:02 +02004154 ret = find_image_format(file, filename, &drv, &local_err);
Kevin Wolf17b005f2014-05-27 10:50:29 +02004155 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02004156 goto fail;
Max Reitz2a05cbe2013-12-20 19:28:10 +01004157 }
Kevin Wolf62392eb2015-04-24 16:38:02 +02004158 /*
4159 * This option update would logically belong in bdrv_fill_options(),
4160 * but we first need to open bs->file for the probing to work, while
4161 * opening bs->file already requires the (mostly) final set of options
4162 * so that cache mode etc. can be inherited.
4163 *
4164 * Adding the driver later is somewhat ugly, but it's not an option
4165 * that would ever be inherited, so it's correct. We just need to make
4166 * sure to update both bs->options (which has the full effective
4167 * options for bs) and options (which has file.* already removed).
4168 */
Eric Blake46f5ac22017-04-27 16:58:17 -05004169 qdict_put_str(bs->options, "driver", drv->format_name);
4170 qdict_put_str(options, "driver", drv->format_name);
Kevin Wolf76c591b2014-06-04 14:19:44 +02004171 } else if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02004172 error_setg(errp, "Must specify either driver or file");
Kevin Wolf8bfea152014-04-11 19:16:36 +02004173 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01004174 }
4175
Max Reitz53a29512015-03-19 14:53:16 -04004176 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
Paolo Bonzini41770f62022-11-24 16:21:18 +01004177 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->protocol_name);
Max Reitz53a29512015-03-19 14:53:16 -04004178 /* file must be NULL if a protocol BDS is about to be created
4179 * (the inverse results in an error message from bdrv_open_common()) */
4180 assert(!(flags & BDRV_O_PROTOCOL) || !file);
4181
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02004182 /* Open the image */
Kevin Wolf82dc8b42016-01-11 19:07:50 +01004183 ret = bdrv_open_common(bs, file, options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02004184 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02004185 goto fail;
Christoph Hellwig69873072010-01-20 18:13:25 +01004186 }
4187
Kevin Wolf4e4bf5c2016-12-16 18:52:37 +01004188 if (file) {
Kevin Wolf5696c6e2017-02-17 18:39:24 +01004189 blk_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01004190 file = NULL;
4191 }
4192
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02004193 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02004194 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolfd9b7b052015-01-16 18:23:41 +01004195 ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02004196 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01004197 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02004198 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02004199 }
4200
Alberto Garcia50196d72018-09-06 12:37:03 +03004201 /* Remove all children options and references
4202 * from bs->options and bs->explicit_options */
Alberto Garcia2f624b82018-06-29 14:37:00 +03004203 QLIST_FOREACH(child, &bs->children, next) {
4204 char *child_key_dot;
4205 child_key_dot = g_strdup_printf("%s.", child->name);
4206 qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot);
4207 qdict_extract_subqdict(bs->options, NULL, child_key_dot);
Alberto Garcia50196d72018-09-06 12:37:03 +03004208 qdict_del(bs->explicit_options, child->name);
4209 qdict_del(bs->options, child->name);
Alberto Garcia2f624b82018-06-29 14:37:00 +03004210 g_free(child_key_dot);
4211 }
4212
Kevin Wolfb6ad4912013-03-15 10:35:04 +01004213 /* Check if any unknown options were used */
Paolo Bonzini7ad27572017-01-04 15:59:14 +01004214 if (qdict_size(options) != 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01004215 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01004216 if (flags & BDRV_O_PROTOCOL) {
4217 error_setg(errp, "Block protocol '%s' doesn't support the option "
4218 "'%s'", drv->format_name, entry->key);
4219 } else {
Max Reitzd0e46a52016-03-16 19:54:34 +01004220 error_setg(errp,
4221 "Block format '%s' does not support the option '%s'",
4222 drv->format_name, entry->key);
Max Reitz5acd9d82014-02-18 18:33:11 +01004223 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01004224
Kevin Wolfb6ad4912013-03-15 10:35:04 +01004225 goto close_and_fail;
4226 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01004227
Daniel P. Berrangec01c2142017-06-23 17:24:16 +01004228 bdrv_parent_cb_change_media(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02004229
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02004230 qobject_unref(options);
Alberto Garcia8961be32018-09-06 17:25:41 +03004231 options = NULL;
Kevin Wolfdd62f1c2015-06-18 14:09:57 +02004232
4233 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
4234 * temporary snapshot afterwards. */
4235 if (snapshot_flags) {
Max Reitz66836182016-05-17 16:41:27 +02004236 BlockDriverState *snapshot_bs;
4237 snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags,
4238 snapshot_options, &local_err);
Kevin Wolf73176be2016-03-07 13:02:15 +01004239 snapshot_options = NULL;
Kevin Wolfdd62f1c2015-06-18 14:09:57 +02004240 if (local_err) {
4241 goto close_and_fail;
4242 }
Max Reitz5b363932016-05-17 16:41:31 +02004243 /* We are not going to return bs but the overlay on top of it
4244 * (snapshot_bs); thus, we have to drop the strong reference to bs
4245 * (which we obtained by calling bdrv_new()). bs will not be deleted,
4246 * though, because the overlay still has a reference to it. */
4247 bdrv_unref(bs);
4248 bs = snapshot_bs;
Max Reitz66836182016-05-17 16:41:27 +02004249 }
4250
Max Reitz5b363932016-05-17 16:41:31 +02004251 return bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02004252
Kevin Wolf8bfea152014-04-11 19:16:36 +02004253fail:
Kevin Wolf5696c6e2017-02-17 18:39:24 +01004254 blk_unref(file);
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02004255 qobject_unref(snapshot_options);
4256 qobject_unref(bs->explicit_options);
4257 qobject_unref(bs->options);
4258 qobject_unref(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01004259 bs->options = NULL;
Manos Pitsidianakis998cbd62017-07-14 17:35:47 +03004260 bs->explicit_options = NULL;
Max Reitz5b363932016-05-17 16:41:31 +02004261 bdrv_unref(bs);
Eduardo Habkost621ff942016-06-13 18:57:56 -03004262 error_propagate(errp, local_err);
Max Reitz5b363932016-05-17 16:41:31 +02004263 return NULL;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01004264
Kevin Wolfb6ad4912013-03-15 10:35:04 +01004265close_and_fail:
Max Reitz5b363932016-05-17 16:41:31 +02004266 bdrv_unref(bs);
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02004267 qobject_unref(snapshot_options);
4268 qobject_unref(options);
Eduardo Habkost621ff942016-06-13 18:57:56 -03004269 error_propagate(errp, local_err);
Max Reitz5b363932016-05-17 16:41:31 +02004270 return NULL;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02004271}
4272
Max Reitz5b363932016-05-17 16:41:31 +02004273BlockDriverState *bdrv_open(const char *filename, const char *reference,
4274 QDict *options, int flags, Error **errp)
Kevin Wolff3930ed2015-04-08 13:43:47 +02004275{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05004276 GLOBAL_STATE_CODE();
4277
Max Reitz5b363932016-05-17 16:41:31 +02004278 return bdrv_open_inherit(filename, reference, options, flags, NULL,
Kevin Wolf7ead9462024-04-25 14:56:02 +02004279 NULL, 0, true, errp);
Kevin Wolff3930ed2015-04-08 13:43:47 +02004280}
4281
Alberto Garciafaf116b2019-03-12 18:48:49 +02004282/* Return true if the NULL-terminated @list contains @str */
4283static bool is_str_in_list(const char *str, const char *const *list)
4284{
4285 if (str && list) {
4286 int i;
4287 for (i = 0; list[i] != NULL; i++) {
4288 if (!strcmp(str, list[i])) {
4289 return true;
4290 }
4291 }
4292 }
4293 return false;
4294}
4295
4296/*
4297 * Check that every option set in @bs->options is also set in
4298 * @new_opts.
4299 *
4300 * Options listed in the common_options list and in
4301 * @bs->drv->mutable_opts are skipped.
4302 *
4303 * Return 0 on success, otherwise return -EINVAL and set @errp.
4304 */
4305static int bdrv_reset_options_allowed(BlockDriverState *bs,
4306 const QDict *new_opts, Error **errp)
4307{
4308 const QDictEntry *e;
4309 /* These options are common to all block drivers and are handled
4310 * in bdrv_reopen_prepare() so they can be left out of @new_opts */
4311 const char *const common_options[] = {
4312 "node-name", "discard", "cache.direct", "cache.no-flush",
4313 "read-only", "auto-read-only", "detect-zeroes", NULL
4314 };
4315
4316 for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
4317 if (!qdict_haskey(new_opts, e->key) &&
4318 !is_str_in_list(e->key, common_options) &&
4319 !is_str_in_list(e->key, bs->drv->mutable_opts)) {
4320 error_setg(errp, "Option '%s' cannot be reset "
4321 "to its default value", e->key);
4322 return -EINVAL;
4323 }
4324 }
4325
4326 return 0;
4327}
4328
Jeff Codye971aa12012-09-20 15:13:19 -04004329/*
Alberto Garciacb828c32019-03-12 18:48:47 +02004330 * Returns true if @child can be reached recursively from @bs
4331 */
Kevin Wolfce433d22023-09-29 16:51:43 +02004332static bool GRAPH_RDLOCK
4333bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child)
Alberto Garciacb828c32019-03-12 18:48:47 +02004334{
4335 BdrvChild *c;
4336
4337 if (bs == child) {
4338 return true;
4339 }
4340
4341 QLIST_FOREACH(c, &bs->children, next) {
4342 if (bdrv_recurse_has_child(c->bs, child)) {
4343 return true;
4344 }
4345 }
4346
4347 return false;
4348}
4349
4350/*
Jeff Codye971aa12012-09-20 15:13:19 -04004351 * Adds a BlockDriverState to a simple queue for an atomic, transactional
4352 * reopen of multiple devices.
4353 *
Vladimir Sementsov-Ogievskiy859443b2019-09-27 15:23:47 +03004354 * bs_queue can either be an existing BlockReopenQueue that has had QTAILQ_INIT
Jeff Codye971aa12012-09-20 15:13:19 -04004355 * already performed, or alternatively may be NULL a new BlockReopenQueue will
4356 * be created and initialized. This newly created BlockReopenQueue should be
4357 * passed back in for subsequent calls that are intended to be of the same
4358 * atomic 'set'.
4359 *
4360 * bs is the BlockDriverState to add to the reopen queue.
4361 *
Kevin Wolf4d2cb092015-04-10 17:50:50 +02004362 * options contains the changed options for the associated bs
4363 * (the BlockReopenQueue takes ownership)
4364 *
Jeff Codye971aa12012-09-20 15:13:19 -04004365 * flags contains the open flags for the associated bs
4366 *
4367 * returns a pointer to bs_queue, which is either the newly allocated
4368 * bs_queue, or the existing bs_queue being used.
4369 *
Fiona Ebnere1d681b2025-05-30 17:10:39 +02004370 * bs must be drained.
Jeff Codye971aa12012-09-20 15:13:19 -04004371 */
Kevin Wolfce433d22023-09-29 16:51:43 +02004372static BlockReopenQueue * GRAPH_RDLOCK
4373bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs,
4374 QDict *options, const BdrvChildClass *klass,
4375 BdrvChildRole role, bool parent_is_format,
4376 QDict *parent_options, int parent_flags,
4377 bool keep_old_opts)
Jeff Codye971aa12012-09-20 15:13:19 -04004378{
4379 assert(bs != NULL);
4380
4381 BlockReopenQueueEntry *bs_entry;
Kevin Wolf67251a32015-04-09 18:54:04 +02004382 BdrvChild *child;
Alberto Garcia9aa09dd2018-11-12 16:00:45 +02004383 QDict *old_options, *explicit_options, *options_copy;
4384 int flags;
4385 QemuOpts *opts;
Kevin Wolf67251a32015-04-09 18:54:04 +02004386
Emanuele Giuseppe Espositof0c28322022-03-03 10:16:13 -05004387 GLOBAL_STATE_CODE();
Kevin Wolf1a63a902017-12-06 20:24:44 +01004388
Fiona Ebnere1d681b2025-05-30 17:10:39 +02004389 assert(bs->quiesce_counter > 0);
Kevin Wolfd22933a2022-11-18 18:41:02 +01004390
Jeff Codye971aa12012-09-20 15:13:19 -04004391 if (bs_queue == NULL) {
4392 bs_queue = g_new0(BlockReopenQueue, 1);
Vladimir Sementsov-Ogievskiy859443b2019-09-27 15:23:47 +03004393 QTAILQ_INIT(bs_queue);
Jeff Codye971aa12012-09-20 15:13:19 -04004394 }
4395
Kevin Wolf4d2cb092015-04-10 17:50:50 +02004396 if (!options) {
4397 options = qdict_new();
4398 }
4399
Alberto Garcia5b7ba052016-09-15 17:53:03 +03004400 /* Check if this BlockDriverState is already in the queue */
Vladimir Sementsov-Ogievskiy859443b2019-09-27 15:23:47 +03004401 QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
Alberto Garcia5b7ba052016-09-15 17:53:03 +03004402 if (bs == bs_entry->state.bs) {
4403 break;
4404 }
4405 }
4406
Kevin Wolf28518102015-05-08 17:07:31 +02004407 /*
4408 * Precedence of options:
4409 * 1. Explicitly passed in options (highest)
Alberto Garcia9aa09dd2018-11-12 16:00:45 +02004410 * 2. Retained from explicitly set options of bs
4411 * 3. Inherited from parent node
4412 * 4. Retained from effective options of bs
Kevin Wolf28518102015-05-08 17:07:31 +02004413 */
4414
Kevin Wolf145f5982015-05-08 16:15:03 +02004415 /* Old explicitly set values (don't overwrite by inherited value) */
Alberto Garcia077e8e22019-03-12 18:48:44 +02004416 if (bs_entry || keep_old_opts) {
4417 old_options = qdict_clone_shallow(bs_entry ?
4418 bs_entry->state.explicit_options :
4419 bs->explicit_options);
4420 bdrv_join_options(bs, options, old_options);
4421 qobject_unref(old_options);
Alberto Garcia5b7ba052016-09-15 17:53:03 +03004422 }
Kevin Wolf145f5982015-05-08 16:15:03 +02004423
4424 explicit_options = qdict_clone_shallow(options);
4425
Kevin Wolf28518102015-05-08 17:07:31 +02004426 /* Inherit from parent node */
4427 if (parent_options) {
Alberto Garcia9aa09dd2018-11-12 16:00:45 +02004428 flags = 0;
Max Reitz3cdc69d2020-05-13 13:05:18 +02004429 klass->inherit_options(role, parent_is_format, &flags, options,
Max Reitz272c02e2020-05-13 13:05:17 +02004430 parent_flags, parent_options);
Alberto Garcia9aa09dd2018-11-12 16:00:45 +02004431 } else {
4432 flags = bdrv_get_flags(bs);
Kevin Wolf28518102015-05-08 17:07:31 +02004433 }
4434
Alberto Garcia077e8e22019-03-12 18:48:44 +02004435 if (keep_old_opts) {
4436 /* Old values are used for options that aren't set yet */
4437 old_options = qdict_clone_shallow(bs->options);
4438 bdrv_join_options(bs, options, old_options);
4439 qobject_unref(old_options);
4440 }
Kevin Wolf4d2cb092015-04-10 17:50:50 +02004441
Alberto Garcia9aa09dd2018-11-12 16:00:45 +02004442 /* We have the final set of options so let's update the flags */
4443 options_copy = qdict_clone_shallow(options);
4444 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
4445 qemu_opts_absorb_qdict(opts, options_copy, NULL);
4446 update_flags_from_options(&flags, opts);
4447 qemu_opts_del(opts);
4448 qobject_unref(options_copy);
4449
Kevin Wolffd452022017-08-03 17:02:59 +02004450 /* bdrv_open_inherit() sets and clears some additional flags internally */
Kevin Wolff1f25a22014-04-25 19:04:55 +02004451 flags &= ~BDRV_O_PROTOCOL;
Kevin Wolffd452022017-08-03 17:02:59 +02004452 if (flags & BDRV_O_RDWR) {
4453 flags |= BDRV_O_ALLOW_RDWR;
4454 }
Kevin Wolff1f25a22014-04-25 19:04:55 +02004455
Kevin Wolf1857c972017-09-14 14:53:46 +02004456 if (!bs_entry) {
4457 bs_entry = g_new0(BlockReopenQueueEntry, 1);
Vladimir Sementsov-Ogievskiy859443b2019-09-27 15:23:47 +03004458 QTAILQ_INSERT_TAIL(bs_queue, bs_entry, entry);
Kevin Wolf1857c972017-09-14 14:53:46 +02004459 } else {
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02004460 qobject_unref(bs_entry->state.options);
4461 qobject_unref(bs_entry->state.explicit_options);
Kevin Wolf1857c972017-09-14 14:53:46 +02004462 }
4463
4464 bs_entry->state.bs = bs;
4465 bs_entry->state.options = options;
4466 bs_entry->state.explicit_options = explicit_options;
4467 bs_entry->state.flags = flags;
4468
Alberto Garcia85466322019-03-12 18:48:45 +02004469 /*
4470 * If keep_old_opts is false then it means that unspecified
4471 * options must be reset to their original value. We don't allow
4472 * resetting 'backing' but we need to know if the option is
4473 * missing in order to decide if we have to return an error.
4474 */
4475 if (!keep_old_opts) {
4476 bs_entry->state.backing_missing =
4477 !qdict_haskey(options, "backing") &&
4478 !qdict_haskey(options, "backing.driver");
4479 }
4480
Kevin Wolf67251a32015-04-09 18:54:04 +02004481 QLIST_FOREACH(child, &bs->children, next) {
Alberto Garcia85466322019-03-12 18:48:45 +02004482 QDict *new_child_options = NULL;
4483 bool child_keep_old = keep_old_opts;
Kevin Wolf67251a32015-04-09 18:54:04 +02004484
Kevin Wolf4c9dfe52015-05-08 15:14:15 +02004485 /* reopen can only change the options of block devices that were
4486 * implicitly created and inherited options. For other (referenced)
4487 * block devices, a syntax like "backing.foo" results in an error. */
Kevin Wolf67251a32015-04-09 18:54:04 +02004488 if (child->bs->inherits_from != bs) {
4489 continue;
4490 }
4491
Alberto Garcia85466322019-03-12 18:48:45 +02004492 /* Check if the options contain a child reference */
4493 if (qdict_haskey(options, child->name)) {
4494 const char *childref = qdict_get_try_str(options, child->name);
4495 /*
4496 * The current child must not be reopened if the child
4497 * reference is null or points to a different node.
4498 */
4499 if (g_strcmp0(childref, child->bs->node_name)) {
4500 continue;
4501 }
4502 /*
4503 * If the child reference points to the current child then
4504 * reopen it with its existing set of options (note that
4505 * it can still inherit new options from the parent).
4506 */
4507 child_keep_old = true;
4508 } else {
4509 /* Extract child options ("child-name.*") */
4510 char *child_key_dot = g_strdup_printf("%s.", child->name);
4511 qdict_extract_subqdict(explicit_options, NULL, child_key_dot);
4512 qdict_extract_subqdict(options, &new_child_options, child_key_dot);
4513 g_free(child_key_dot);
4514 }
Kevin Wolf4c9dfe52015-05-08 15:14:15 +02004515
Alberto Garcia9aa09dd2018-11-12 16:00:45 +02004516 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options,
Max Reitz3cdc69d2020-05-13 13:05:18 +02004517 child->klass, child->role, bs->drv->is_format,
4518 options, flags, child_keep_old);
Jeff Codye971aa12012-09-20 15:13:19 -04004519 }
4520
Jeff Codye971aa12012-09-20 15:13:19 -04004521 return bs_queue;
4522}
4523
Kevin Wolf28518102015-05-08 17:07:31 +02004524BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
4525 BlockDriverState *bs,
Alberto Garcia077e8e22019-03-12 18:48:44 +02004526 QDict *options, bool keep_old_opts)
Kevin Wolf28518102015-05-08 17:07:31 +02004527{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05004528 GLOBAL_STATE_CODE();
Fiona Ebnere1d681b2025-05-30 17:10:39 +02004529
4530 if (bs_queue == NULL) {
4531 /* Paired with bdrv_drain_all_end() in bdrv_reopen_queue_free(). */
4532 bdrv_drain_all_begin();
4533 }
4534
Kevin Wolfce433d22023-09-29 16:51:43 +02004535 GRAPH_RDLOCK_GUARD_MAINLOOP();
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05004536
Max Reitz3cdc69d2020-05-13 13:05:18 +02004537 return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false,
4538 NULL, 0, keep_old_opts);
Kevin Wolf28518102015-05-08 17:07:31 +02004539}
4540
Alberto Garciaab5b52282021-07-08 13:47:05 +02004541void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
4542{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05004543 GLOBAL_STATE_CODE();
Alberto Garciaab5b52282021-07-08 13:47:05 +02004544 if (bs_queue) {
4545 BlockReopenQueueEntry *bs_entry, *next;
4546 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
4547 qobject_unref(bs_entry->state.explicit_options);
4548 qobject_unref(bs_entry->state.options);
4549 g_free(bs_entry);
4550 }
4551 g_free(bs_queue);
Fiona Ebnere1d681b2025-05-30 17:10:39 +02004552
4553 /* Paired with bdrv_drain_all_begin() in bdrv_reopen_queue(). */
4554 bdrv_drain_all_end();
Alberto Garciaab5b52282021-07-08 13:47:05 +02004555 }
4556}
4557
Jeff Codye971aa12012-09-20 15:13:19 -04004558/*
4559 * Reopen multiple BlockDriverStates atomically & transactionally.
4560 *
4561 * The queue passed in (bs_queue) must have been built up previous
4562 * via bdrv_reopen_queue().
4563 *
4564 * Reopens all BDS specified in the queue, with the appropriate
4565 * flags. All devices are prepared for reopen, and failure of any
Stefan Weil50d6a8a2018-07-12 21:51:20 +02004566 * device will cause all device changes to be abandoned, and intermediate
Jeff Codye971aa12012-09-20 15:13:19 -04004567 * data cleaned up.
4568 *
4569 * If all devices prepare successfully, then the changes are committed
4570 * to all devices.
4571 *
Kevin Wolf1a63a902017-12-06 20:24:44 +01004572 * All affected nodes must be drained between bdrv_reopen_queue() and
4573 * bdrv_reopen_multiple().
Kevin Wolf6cf42ca2021-07-08 13:47:06 +02004574 *
4575 * To be called from the main thread, with all other AioContexts unlocked.
Jeff Codye971aa12012-09-20 15:13:19 -04004576 */
Alberto Garcia5019aec2019-03-12 18:48:50 +02004577int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
Jeff Codye971aa12012-09-20 15:13:19 -04004578{
4579 int ret = -1;
4580 BlockReopenQueueEntry *bs_entry, *next;
Vladimir Sementsov-Ogievskiy72373e42021-04-28 18:17:58 +03004581 Transaction *tran = tran_new();
Vladimir Sementsov-Ogievskiy72373e42021-04-28 18:17:58 +03004582 g_autoptr(GSList) refresh_list = NULL;
Jeff Codye971aa12012-09-20 15:13:19 -04004583
Kevin Wolf6cf42ca2021-07-08 13:47:06 +02004584 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
Jeff Codye971aa12012-09-20 15:13:19 -04004585 assert(bs_queue != NULL);
Emanuele Giuseppe Espositoda359902022-03-03 10:16:11 -05004586 GLOBAL_STATE_CODE();
Jeff Codye971aa12012-09-20 15:13:19 -04004587
Vladimir Sementsov-Ogievskiy859443b2019-09-27 15:23:47 +03004588 QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
Vladimir Sementsov-Ogievskiya2aabf82021-04-28 18:17:57 +03004589 ret = bdrv_flush(bs_entry->state.bs);
4590 if (ret < 0) {
4591 error_setg_errno(errp, -ret, "Error flushing drive");
Kevin Wolfe3fc91a2021-05-03 13:05:55 +02004592 goto abort;
Vladimir Sementsov-Ogievskiya2aabf82021-04-28 18:17:57 +03004593 }
4594 }
4595
4596 QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
Kevin Wolf1a63a902017-12-06 20:24:44 +01004597 assert(bs_entry->state.bs->quiesce_counter > 0);
Vladimir Sementsov-Ogievskiy72373e42021-04-28 18:17:58 +03004598 ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp);
4599 if (ret < 0) {
4600 goto abort;
Jeff Codye971aa12012-09-20 15:13:19 -04004601 }
4602 bs_entry->prepared = true;
4603 }
4604
Vladimir Sementsov-Ogievskiy859443b2019-09-27 15:23:47 +03004605 QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
Kevin Wolf69b736e2019-03-05 17:18:22 +01004606 BDRVReopenState *state = &bs_entry->state;
Vladimir Sementsov-Ogievskiy72373e42021-04-28 18:17:58 +03004607
Vladimir Sementsov-Ogievskiyfb0ff4d2022-11-07 19:35:58 +03004608 refresh_list = g_slist_prepend(refresh_list, state->bs);
Vladimir Sementsov-Ogievskiy72373e42021-04-28 18:17:58 +03004609 if (state->old_backing_bs) {
Vladimir Sementsov-Ogievskiyfb0ff4d2022-11-07 19:35:58 +03004610 refresh_list = g_slist_prepend(refresh_list, state->old_backing_bs);
Kevin Wolf69b736e2019-03-05 17:18:22 +01004611 }
Alberto Garciaecd30d22021-06-10 15:05:36 +03004612 if (state->old_file_bs) {
Vladimir Sementsov-Ogievskiyfb0ff4d2022-11-07 19:35:58 +03004613 refresh_list = g_slist_prepend(refresh_list, state->old_file_bs);
Alberto Garciaecd30d22021-06-10 15:05:36 +03004614 }
Vladimir Sementsov-Ogievskiy72373e42021-04-28 18:17:58 +03004615 }
4616
4617 /*
4618 * Note that file-posix driver rely on permission update done during reopen
4619 * (even if no permission changed), because it wants "new" permissions for
4620 * reconfiguring the fd and that's why it does it in raw_check_perm(), not
4621 * in raw_reopen_prepare() which is called with "old" permissions.
4622 */
Kevin Wolf3804e3c2023-09-11 11:46:12 +02004623 bdrv_graph_rdlock_main_loop();
Vladimir Sementsov-Ogievskiy72373e42021-04-28 18:17:58 +03004624 ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp);
Kevin Wolf3804e3c2023-09-11 11:46:12 +02004625 bdrv_graph_rdunlock_main_loop();
4626
Vladimir Sementsov-Ogievskiy72373e42021-04-28 18:17:58 +03004627 if (ret < 0) {
4628 goto abort;
Kevin Wolf69b736e2019-03-05 17:18:22 +01004629 }
4630
Vladimir Sementsov-Ogievskiyfcd6a4f2019-09-27 15:23:48 +03004631 /*
4632 * If we reach this point, we have success and just need to apply the
4633 * changes.
4634 *
4635 * Reverse order is used to comfort qcow2 driver: on commit it need to write
4636 * IN_USE flag to the image, to mark bitmaps in the image as invalid. But
4637 * children are usually goes after parents in reopen-queue, so go from last
4638 * to first element.
Jeff Codye971aa12012-09-20 15:13:19 -04004639 */
Vladimir Sementsov-Ogievskiyfcd6a4f2019-09-27 15:23:48 +03004640 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
Jeff Codye971aa12012-09-20 15:13:19 -04004641 bdrv_reopen_commit(&bs_entry->state);
4642 }
4643
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05004644 bdrv_graph_wrlock();
Vladimir Sementsov-Ogievskiy72373e42021-04-28 18:17:58 +03004645 tran_commit(tran);
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05004646 bdrv_graph_wrunlock();
Vladimir Sementsov-Ogievskiy72373e42021-04-28 18:17:58 +03004647
4648 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
4649 BlockDriverState *bs = bs_entry->state.bs;
4650
4651 if (bs->drv->bdrv_reopen_commit_post) {
4652 bs->drv->bdrv_reopen_commit_post(&bs_entry->state);
4653 }
4654 }
4655
Jeff Codye971aa12012-09-20 15:13:19 -04004656 ret = 0;
Vladimir Sementsov-Ogievskiy72373e42021-04-28 18:17:58 +03004657 goto cleanup;
4658
4659abort:
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05004660 bdrv_graph_wrlock();
Vladimir Sementsov-Ogievskiy72373e42021-04-28 18:17:58 +03004661 tran_abort(tran);
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05004662 bdrv_graph_wrunlock();
Kevin Wolf7d4ca9d2023-09-11 11:46:09 +02004663
Vladimir Sementsov-Ogievskiy859443b2019-09-27 15:23:47 +03004664 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
Vladimir Sementsov-Ogievskiy72373e42021-04-28 18:17:58 +03004665 if (bs_entry->prepared) {
4666 bdrv_reopen_abort(&bs_entry->state);
Kevin Wolf69b736e2019-03-05 17:18:22 +01004667 }
Kevin Wolf69b736e2019-03-05 17:18:22 +01004668 }
Peter Krempa17e1e2b2020-02-28 13:44:46 +01004669
Jeff Codye971aa12012-09-20 15:13:19 -04004670cleanup:
Alberto Garciaab5b52282021-07-08 13:47:05 +02004671 bdrv_reopen_queue_free(bs_queue);
Alberto Garcia40840e42016-10-28 10:08:03 +03004672
Jeff Codye971aa12012-09-20 15:13:19 -04004673 return ret;
4674}
4675
Kevin Wolf6cf42ca2021-07-08 13:47:06 +02004676int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
4677 Error **errp)
4678{
Kevin Wolf6cf42ca2021-07-08 13:47:06 +02004679 BlockReopenQueue *queue;
Kevin Wolf6cf42ca2021-07-08 13:47:06 +02004680
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05004681 GLOBAL_STATE_CODE();
4682
Kevin Wolf2e117862022-11-18 18:41:01 +01004683 queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
4684
Stefan Hajnoczib49f4752023-12-05 13:20:03 -05004685 return bdrv_reopen_multiple(queue, errp);
Kevin Wolf6cf42ca2021-07-08 13:47:06 +02004686}
4687
Alberto Garcia6e1000a2018-11-12 16:00:33 +02004688int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
4689 Error **errp)
4690{
Alberto Garcia6e1000a2018-11-12 16:00:33 +02004691 QDict *opts = qdict_new();
4692
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05004693 GLOBAL_STATE_CODE();
4694
Alberto Garcia6e1000a2018-11-12 16:00:33 +02004695 qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
4696
Kevin Wolf6cf42ca2021-07-08 13:47:06 +02004697 return bdrv_reopen(bs, opts, true, errp);
Alberto Garcia6e1000a2018-11-12 16:00:33 +02004698}
4699
Jeff Codye971aa12012-09-20 15:13:19 -04004700/*
Alberto Garciacb828c32019-03-12 18:48:47 +02004701 * Take a BDRVReopenState and check if the value of 'backing' in the
4702 * reopen_state->options QDict is valid or not.
4703 *
4704 * If 'backing' is missing from the QDict then return 0.
4705 *
4706 * If 'backing' contains the node name of the backing file of
4707 * reopen_state->bs then return 0.
4708 *
4709 * If 'backing' contains a different node name (or is null) then check
4710 * whether the current backing file can be replaced with the new one.
4711 * If that's the case then reopen_state->replace_backing_bs is set to
4712 * true and reopen_state->new_backing_bs contains a pointer to the new
4713 * backing BlockDriverState (or NULL).
4714 *
Kevin Wolf5661a002023-09-11 11:46:10 +02004715 * After calling this function, the transaction @tran may only be completed
4716 * while holding a writer lock for the graph.
4717 *
Alberto Garciacb828c32019-03-12 18:48:47 +02004718 * Return 0 on success, otherwise return < 0 and set @errp.
Kevin Wolf4b408662023-06-05 10:57:06 +02004719 *
Kevin Wolf4b408662023-06-05 10:57:06 +02004720 * @reopen_state->bs can move to a different AioContext in this function.
Fiona Ebner2b833592025-05-30 17:10:47 +02004721 *
4722 * All block nodes must be drained before this function is called until after
4723 * the transaction is finalized.
Alberto Garciacb828c32019-03-12 18:48:47 +02004724 */
Kevin Wolfce433d22023-09-29 16:51:43 +02004725static int GRAPH_UNLOCKED
4726bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
4727 bool is_backing, Transaction *tran,
4728 Error **errp)
Alberto Garciacb828c32019-03-12 18:48:47 +02004729{
4730 BlockDriverState *bs = reopen_state->bs;
Alberto Garciaecd30d22021-06-10 15:05:36 +03004731 BlockDriverState *new_child_bs;
Kevin Wolf004915a2023-10-27 17:53:26 +02004732 BlockDriverState *old_child_bs;
4733
Alberto Garciaecd30d22021-06-10 15:05:36 +03004734 const char *child_name = is_backing ? "backing" : "file";
Alberto Garciacb828c32019-03-12 18:48:47 +02004735 QObject *value;
4736 const char *str;
Kevin Wolfce433d22023-09-29 16:51:43 +02004737 bool has_child;
Kevin Wolf4b408662023-06-05 10:57:06 +02004738 int ret;
Alberto Garciacb828c32019-03-12 18:48:47 +02004739
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05004740 GLOBAL_STATE_CODE();
4741
Alberto Garciaecd30d22021-06-10 15:05:36 +03004742 value = qdict_get(reopen_state->options, child_name);
Alberto Garciacb828c32019-03-12 18:48:47 +02004743 if (value == NULL) {
4744 return 0;
4745 }
4746
Kevin Wolf430da832023-10-27 17:53:16 +02004747 bdrv_graph_rdlock_main_loop();
4748
Alberto Garciacb828c32019-03-12 18:48:47 +02004749 switch (qobject_type(value)) {
4750 case QTYPE_QNULL:
Alberto Garciaecd30d22021-06-10 15:05:36 +03004751 assert(is_backing); /* The 'file' option does not allow a null value */
4752 new_child_bs = NULL;
Alberto Garciacb828c32019-03-12 18:48:47 +02004753 break;
4754 case QTYPE_QSTRING:
Markus Armbruster410f44f2020-12-11 18:11:42 +01004755 str = qstring_get_str(qobject_to(QString, value));
Alberto Garciaecd30d22021-06-10 15:05:36 +03004756 new_child_bs = bdrv_lookup_bs(NULL, str, errp);
4757 if (new_child_bs == NULL) {
Kevin Wolf430da832023-10-27 17:53:16 +02004758 ret = -EINVAL;
4759 goto out_rdlock;
Kevin Wolfce433d22023-09-29 16:51:43 +02004760 }
4761
Kevin Wolfce433d22023-09-29 16:51:43 +02004762 has_child = bdrv_recurse_has_child(new_child_bs, bs);
Kevin Wolfce433d22023-09-29 16:51:43 +02004763 if (has_child) {
Alberto Garciaecd30d22021-06-10 15:05:36 +03004764 error_setg(errp, "Making '%s' a %s child of '%s' would create a "
4765 "cycle", str, child_name, bs->node_name);
Kevin Wolf430da832023-10-27 17:53:16 +02004766 ret = -EINVAL;
4767 goto out_rdlock;
Alberto Garciacb828c32019-03-12 18:48:47 +02004768 }
4769 break;
4770 default:
Alberto Garciaecd30d22021-06-10 15:05:36 +03004771 /*
4772 * The options QDict has been flattened, so 'backing' and 'file'
4773 * do not allow any other data type here.
4774 */
Alberto Garciacb828c32019-03-12 18:48:47 +02004775 g_assert_not_reached();
4776 }
4777
Kevin Wolf004915a2023-10-27 17:53:26 +02004778 old_child_bs = is_backing ? child_bs(bs->backing) : child_bs(bs->file);
Alberto Garciaecd30d22021-06-10 15:05:36 +03004779 if (old_child_bs == new_child_bs) {
Kevin Wolf430da832023-10-27 17:53:16 +02004780 ret = 0;
4781 goto out_rdlock;
Alberto Garciaecd30d22021-06-10 15:05:36 +03004782 }
4783
4784 if (old_child_bs) {
4785 if (bdrv_skip_implicit_filters(old_child_bs) == new_child_bs) {
Kevin Wolf430da832023-10-27 17:53:16 +02004786 ret = 0;
4787 goto out_rdlock;
Vladimir Sementsov-Ogievskiycbfdb982021-06-10 15:05:33 +03004788 }
4789
Alberto Garciaecd30d22021-06-10 15:05:36 +03004790 if (old_child_bs->implicit) {
4791 error_setg(errp, "Cannot replace implicit %s child of %s",
4792 child_name, bs->node_name);
Kevin Wolf430da832023-10-27 17:53:16 +02004793 ret = -EPERM;
4794 goto out_rdlock;
Vladimir Sementsov-Ogievskiycbfdb982021-06-10 15:05:33 +03004795 }
4796 }
4797
Alberto Garciaecd30d22021-06-10 15:05:36 +03004798 if (bs->drv->is_filter && !old_child_bs) {
Vladimir Sementsov-Ogievskiy25f78d92021-06-10 15:05:34 +03004799 /*
4800 * Filters always have a file or a backing child, so we are trying to
4801 * change wrong child
4802 */
4803 error_setg(errp, "'%s' is a %s filter node that does not support a "
Alberto Garciaecd30d22021-06-10 15:05:36 +03004804 "%s child", bs->node_name, bs->drv->format_name, child_name);
Kevin Wolf430da832023-10-27 17:53:16 +02004805 ret = -EINVAL;
4806 goto out_rdlock;
Max Reitz1d42f482019-06-12 17:24:39 +02004807 }
4808
Alberto Garciaecd30d22021-06-10 15:05:36 +03004809 if (is_backing) {
4810 reopen_state->old_backing_bs = old_child_bs;
4811 } else {
4812 reopen_state->old_file_bs = old_child_bs;
4813 }
4814
Kevin Wolf7d4ca9d2023-09-11 11:46:09 +02004815 if (old_child_bs) {
4816 bdrv_ref(old_child_bs);
Fiona Ebner2b833592025-05-30 17:10:47 +02004817 assert(old_child_bs->quiesce_counter > 0);
Kevin Wolf7d4ca9d2023-09-11 11:46:09 +02004818 }
4819
Kevin Wolf430da832023-10-27 17:53:16 +02004820 bdrv_graph_rdunlock_main_loop();
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05004821 bdrv_graph_wrlock();
Kevin Wolf7d4ca9d2023-09-11 11:46:09 +02004822
Kevin Wolf4b408662023-06-05 10:57:06 +02004823 ret = bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing,
4824 tran, errp);
4825
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05004826 bdrv_graph_wrunlock();
Kevin Wolf7d4ca9d2023-09-11 11:46:09 +02004827
Kevin Wolf7d4ca9d2023-09-11 11:46:09 +02004828 if (old_child_bs) {
Kevin Wolf7d4ca9d2023-09-11 11:46:09 +02004829 bdrv_unref(old_child_bs);
4830 }
4831
Kevin Wolf4b408662023-06-05 10:57:06 +02004832 return ret;
Kevin Wolf430da832023-10-27 17:53:16 +02004833
4834out_rdlock:
4835 bdrv_graph_rdunlock_main_loop();
4836 return ret;
Alberto Garciacb828c32019-03-12 18:48:47 +02004837}
4838
4839/*
Jeff Codye971aa12012-09-20 15:13:19 -04004840 * Prepares a BlockDriverState for reopen. All changes are staged in the
4841 * 'opaque' field of the BDRVReopenState, which is used and allocated by
4842 * the block driver layer .bdrv_reopen_prepare()
4843 *
4844 * bs is the BlockDriverState to reopen
4845 * flags are the new open flags
4846 * queue is the reopen queue
4847 *
4848 * Returns 0 on success, non-zero on error. On error errp will be set
4849 * as well.
4850 *
4851 * On failure, bdrv_reopen_abort() will be called to clean up any data.
4852 * It is the responsibility of the caller to then call the abort() or
4853 * commit() for any other BDS that have been left in a prepare() state
4854 *
Kevin Wolf5661a002023-09-11 11:46:10 +02004855 * After calling this function, the transaction @change_child_tran may only be
4856 * completed while holding a writer lock for the graph.
Fiona Ebner2b833592025-05-30 17:10:47 +02004857 *
4858 * All block nodes must be drained before this function is called until after
4859 * the transaction is finalized.
Jeff Codye971aa12012-09-20 15:13:19 -04004860 */
Kevin Wolfce433d22023-09-29 16:51:43 +02004861static int GRAPH_UNLOCKED
4862bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
4863 Transaction *change_child_tran, Error **errp)
Jeff Codye971aa12012-09-20 15:13:19 -04004864{
4865 int ret = -1;
Alberto Garciae6d79c42018-11-12 16:00:47 +02004866 int old_flags;
Jeff Codye971aa12012-09-20 15:13:19 -04004867 Error *local_err = NULL;
4868 BlockDriver *drv;
Kevin Wolfccf9dc02015-05-08 17:24:56 +02004869 QemuOpts *opts;
Alberto Garcia4c8350f2018-06-29 14:37:02 +03004870 QDict *orig_reopen_opts;
Alberto Garcia593b3072018-09-06 12:37:08 +03004871 char *discard = NULL;
Jeff Cody3d8ce172017-04-07 16:55:30 -04004872 bool read_only;
Max Reitz9ad08c42018-11-16 17:45:24 +01004873 bool drv_prepared = false;
Jeff Codye971aa12012-09-20 15:13:19 -04004874
4875 assert(reopen_state != NULL);
4876 assert(reopen_state->bs->drv != NULL);
Emanuele Giuseppe Espositoda359902022-03-03 10:16:11 -05004877 GLOBAL_STATE_CODE();
Jeff Codye971aa12012-09-20 15:13:19 -04004878 drv = reopen_state->bs->drv;
4879
Alberto Garcia4c8350f2018-06-29 14:37:02 +03004880 /* This function and each driver's bdrv_reopen_prepare() remove
4881 * entries from reopen_state->options as they are processed, so
4882 * we need to make a copy of the original QDict. */
4883 orig_reopen_opts = qdict_clone_shallow(reopen_state->options);
4884
Kevin Wolfccf9dc02015-05-08 17:24:56 +02004885 /* Process generic block layer options */
4886 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
Markus Armbrusteraf175e82020-07-07 18:06:03 +02004887 if (!qemu_opts_absorb_qdict(opts, reopen_state->options, errp)) {
Kevin Wolfccf9dc02015-05-08 17:24:56 +02004888 ret = -EINVAL;
4889 goto error;
4890 }
4891
Alberto Garciae6d79c42018-11-12 16:00:47 +02004892 /* This was already called in bdrv_reopen_queue_child() so the flags
4893 * are up-to-date. This time we simply want to remove the options from
4894 * QemuOpts in order to indicate that they have been processed. */
4895 old_flags = reopen_state->flags;
Kevin Wolf91a097e2015-05-08 17:49:53 +02004896 update_flags_from_options(&reopen_state->flags, opts);
Alberto Garciae6d79c42018-11-12 16:00:47 +02004897 assert(old_flags == reopen_state->flags);
Kevin Wolf91a097e2015-05-08 17:49:53 +02004898
Alberto Garcia415bbca2018-10-03 13:23:13 +03004899 discard = qemu_opt_get_del(opts, BDRV_OPT_DISCARD);
Alberto Garcia593b3072018-09-06 12:37:08 +03004900 if (discard != NULL) {
4901 if (bdrv_parse_discard_flags(discard, &reopen_state->flags) != 0) {
4902 error_setg(errp, "Invalid discard option");
4903 ret = -EINVAL;
4904 goto error;
4905 }
4906 }
4907
Alberto Garcia543770b2018-09-06 12:37:09 +03004908 reopen_state->detect_zeroes =
4909 bdrv_parse_detect_zeroes(opts, reopen_state->flags, &local_err);
4910 if (local_err) {
4911 error_propagate(errp, local_err);
4912 ret = -EINVAL;
4913 goto error;
4914 }
4915
Alberto Garcia57f9db92018-09-06 12:37:06 +03004916 /* All other options (including node-name and driver) must be unchanged.
4917 * Put them back into the QDict, so that they are checked at the end
4918 * of this function. */
4919 qemu_opts_to_qdict(opts, reopen_state->options);
Kevin Wolfccf9dc02015-05-08 17:24:56 +02004920
Jeff Cody3d8ce172017-04-07 16:55:30 -04004921 /* If we are to stay read-only, do not allow permission change
4922 * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is
4923 * not set, or if the BDS still has copy_on_read enabled */
4924 read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf4026f1c2023-09-29 16:51:47 +02004925
4926 bdrv_graph_rdlock_main_loop();
Kevin Wolf54a32bf2017-08-03 17:02:58 +02004927 ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err);
Kevin Wolf4026f1c2023-09-29 16:51:47 +02004928 bdrv_graph_rdunlock_main_loop();
Jeff Cody3d8ce172017-04-07 16:55:30 -04004929 if (local_err) {
4930 error_propagate(errp, local_err);
Jeff Codye971aa12012-09-20 15:13:19 -04004931 goto error;
4932 }
4933
Jeff Codye971aa12012-09-20 15:13:19 -04004934 if (drv->bdrv_reopen_prepare) {
Alberto Garciafaf116b2019-03-12 18:48:49 +02004935 /*
4936 * If a driver-specific option is missing, it means that we
4937 * should reset it to its default value.
4938 * But not all options allow that, so we need to check it first.
4939 */
4940 ret = bdrv_reset_options_allowed(reopen_state->bs,
4941 reopen_state->options, errp);
4942 if (ret) {
4943 goto error;
4944 }
4945
Jeff Codye971aa12012-09-20 15:13:19 -04004946 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
4947 if (ret) {
4948 if (local_err != NULL) {
4949 error_propagate(errp, local_err);
4950 } else {
Kevin Wolfb7cfc7d2023-09-29 16:51:45 +02004951 bdrv_graph_rdlock_main_loop();
Max Reitzf30c66b2019-02-01 20:29:05 +01004952 bdrv_refresh_filename(reopen_state->bs);
Kevin Wolfb7cfc7d2023-09-29 16:51:45 +02004953 bdrv_graph_rdunlock_main_loop();
Luiz Capitulinod8b68952013-06-10 11:29:27 -04004954 error_setg(errp, "failed while preparing to reopen image '%s'",
4955 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04004956 }
4957 goto error;
4958 }
4959 } else {
4960 /* It is currently mandatory to have a bdrv_reopen_prepare()
4961 * handler for each supported drv. */
Kevin Wolf4026f1c2023-09-29 16:51:47 +02004962 bdrv_graph_rdlock_main_loop();
Alberto Garcia81e5f782015-04-08 12:29:19 +03004963 error_setg(errp, "Block format '%s' used by node '%s' "
4964 "does not support reopening files", drv->format_name,
4965 bdrv_get_device_or_node_name(reopen_state->bs));
Kevin Wolf4026f1c2023-09-29 16:51:47 +02004966 bdrv_graph_rdunlock_main_loop();
Jeff Codye971aa12012-09-20 15:13:19 -04004967 ret = -1;
4968 goto error;
4969 }
4970
Max Reitz9ad08c42018-11-16 17:45:24 +01004971 drv_prepared = true;
4972
Alberto Garciabacd9b82019-03-12 18:48:46 +02004973 /*
4974 * We must provide the 'backing' option if the BDS has a backing
4975 * file or if the image file has a backing file name as part of
4976 * its metadata. Otherwise the 'backing' option can be omitted.
4977 */
Kevin Wolf004915a2023-10-27 17:53:26 +02004978 bdrv_graph_rdlock_main_loop();
Alberto Garciabacd9b82019-03-12 18:48:46 +02004979 if (drv->supports_backing && reopen_state->backing_missing &&
Max Reitz1d42f482019-06-12 17:24:39 +02004980 (reopen_state->bs->backing || reopen_state->bs->backing_file[0])) {
Alberto Garcia85466322019-03-12 18:48:45 +02004981 error_setg(errp, "backing is missing for '%s'",
4982 reopen_state->bs->node_name);
Kevin Wolf004915a2023-10-27 17:53:26 +02004983 bdrv_graph_rdunlock_main_loop();
Alberto Garcia85466322019-03-12 18:48:45 +02004984 ret = -EINVAL;
4985 goto error;
4986 }
Kevin Wolf004915a2023-10-27 17:53:26 +02004987 bdrv_graph_rdunlock_main_loop();
Alberto Garcia85466322019-03-12 18:48:45 +02004988
Alberto Garciacb828c32019-03-12 18:48:47 +02004989 /*
4990 * Allow changing the 'backing' option. The new value can be
4991 * either a reference to an existing node (using its node name)
4992 * or NULL to simply detach the current backing file.
4993 */
Alberto Garciaecd30d22021-06-10 15:05:36 +03004994 ret = bdrv_reopen_parse_file_or_backing(reopen_state, true,
4995 change_child_tran, errp);
Alberto Garciacb828c32019-03-12 18:48:47 +02004996 if (ret < 0) {
4997 goto error;
4998 }
4999 qdict_del(reopen_state->options, "backing");
5000
Alberto Garciaecd30d22021-06-10 15:05:36 +03005001 /* Allow changing the 'file' option. In this case NULL is not allowed */
5002 ret = bdrv_reopen_parse_file_or_backing(reopen_state, false,
5003 change_child_tran, errp);
5004 if (ret < 0) {
5005 goto error;
5006 }
5007 qdict_del(reopen_state->options, "file");
5008
Kevin Wolf4d2cb092015-04-10 17:50:50 +02005009 /* Options that are not handled are only okay if they are unchanged
5010 * compared to the old state. It is expected that some options are only
5011 * used for the initial open, but not reopen (e.g. filename) */
5012 if (qdict_size(reopen_state->options)) {
5013 const QDictEntry *entry = qdict_first(reopen_state->options);
5014
Kevin Wolfce433d22023-09-29 16:51:43 +02005015 GRAPH_RDLOCK_GUARD_MAINLOOP();
5016
Kevin Wolf4d2cb092015-04-10 17:50:50 +02005017 do {
Max Reitz54fd1b02017-11-14 19:01:26 +01005018 QObject *new = entry->value;
5019 QObject *old = qdict_get(reopen_state->bs->options, entry->key);
Kevin Wolf4d2cb092015-04-10 17:50:50 +02005020
Alberto Garciadb905282018-09-06 12:37:05 +03005021 /* Allow child references (child_name=node_name) as long as they
5022 * point to the current child (i.e. everything stays the same). */
5023 if (qobject_type(new) == QTYPE_QSTRING) {
5024 BdrvChild *child;
5025 QLIST_FOREACH(child, &reopen_state->bs->children, next) {
5026 if (!strcmp(child->name, entry->key)) {
5027 break;
5028 }
5029 }
5030
5031 if (child) {
Markus Armbruster410f44f2020-12-11 18:11:42 +01005032 if (!strcmp(child->bs->node_name,
5033 qstring_get_str(qobject_to(QString, new)))) {
Alberto Garciadb905282018-09-06 12:37:05 +03005034 continue; /* Found child with this name, skip option */
5035 }
5036 }
5037 }
5038
Max Reitz54fd1b02017-11-14 19:01:26 +01005039 /*
5040 * TODO: When using -drive to specify blockdev options, all values
5041 * will be strings; however, when using -blockdev, blockdev-add or
5042 * filenames using the json:{} pseudo-protocol, they will be
5043 * correctly typed.
5044 * In contrast, reopening options are (currently) always strings
5045 * (because you can only specify them through qemu-io; all other
5046 * callers do not specify any options).
5047 * Therefore, when using anything other than -drive to create a BDS,
5048 * this cannot detect non-string options as unchanged, because
5049 * qobject_is_equal() always returns false for objects of different
5050 * type. In the future, this should be remedied by correctly typing
5051 * all options. For now, this is not too big of an issue because
5052 * the user can simply omit options which cannot be changed anyway,
5053 * so they will stay unchanged.
5054 */
5055 if (!qobject_is_equal(new, old)) {
Kevin Wolf4d2cb092015-04-10 17:50:50 +02005056 error_setg(errp, "Cannot change the option '%s'", entry->key);
5057 ret = -EINVAL;
5058 goto error;
5059 }
5060 } while ((entry = qdict_next(reopen_state->options, entry)));
5061 }
5062
Jeff Codye971aa12012-09-20 15:13:19 -04005063 ret = 0;
5064
Alberto Garcia4c8350f2018-06-29 14:37:02 +03005065 /* Restore the original reopen_state->options QDict */
5066 qobject_unref(reopen_state->options);
5067 reopen_state->options = qobject_ref(orig_reopen_opts);
5068
Jeff Codye971aa12012-09-20 15:13:19 -04005069error:
Max Reitz9ad08c42018-11-16 17:45:24 +01005070 if (ret < 0 && drv_prepared) {
5071 /* drv->bdrv_reopen_prepare() has succeeded, so we need to
5072 * call drv->bdrv_reopen_abort() before signaling an error
5073 * (bdrv_reopen_multiple() will not call bdrv_reopen_abort()
5074 * when the respective bdrv_reopen_prepare() has failed) */
5075 if (drv->bdrv_reopen_abort) {
5076 drv->bdrv_reopen_abort(reopen_state);
5077 }
5078 }
Kevin Wolfccf9dc02015-05-08 17:24:56 +02005079 qemu_opts_del(opts);
Alberto Garcia4c8350f2018-06-29 14:37:02 +03005080 qobject_unref(orig_reopen_opts);
Alberto Garcia593b3072018-09-06 12:37:08 +03005081 g_free(discard);
Jeff Codye971aa12012-09-20 15:13:19 -04005082 return ret;
5083}
5084
5085/*
5086 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
5087 * makes them final by swapping the staging BlockDriverState contents into
5088 * the active BlockDriverState contents.
5089 */
Kevin Wolfce433d22023-09-29 16:51:43 +02005090static void GRAPH_UNLOCKED bdrv_reopen_commit(BDRVReopenState *reopen_state)
Jeff Codye971aa12012-09-20 15:13:19 -04005091{
5092 BlockDriver *drv;
Vladimir Sementsov-Ogievskiy50bf65b2017-06-28 15:05:12 +03005093 BlockDriverState *bs;
Alberto Garcia50196d72018-09-06 12:37:03 +03005094 BdrvChild *child;
Jeff Codye971aa12012-09-20 15:13:19 -04005095
5096 assert(reopen_state != NULL);
Vladimir Sementsov-Ogievskiy50bf65b2017-06-28 15:05:12 +03005097 bs = reopen_state->bs;
5098 drv = bs->drv;
Jeff Codye971aa12012-09-20 15:13:19 -04005099 assert(drv != NULL);
Emanuele Giuseppe Espositoda359902022-03-03 10:16:11 -05005100 GLOBAL_STATE_CODE();
Jeff Codye971aa12012-09-20 15:13:19 -04005101
5102 /* If there are any driver level actions to take */
5103 if (drv->bdrv_reopen_commit) {
5104 drv->bdrv_reopen_commit(reopen_state);
5105 }
5106
Kevin Wolfce433d22023-09-29 16:51:43 +02005107 GRAPH_RDLOCK_GUARD_MAINLOOP();
5108
Jeff Codye971aa12012-09-20 15:13:19 -04005109 /* set BDS specific flags now */
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02005110 qobject_unref(bs->explicit_options);
Alberto Garcia4c8350f2018-06-29 14:37:02 +03005111 qobject_unref(bs->options);
Alberto Garciaab5b52282021-07-08 13:47:05 +02005112 qobject_ref(reopen_state->explicit_options);
5113 qobject_ref(reopen_state->options);
Kevin Wolf145f5982015-05-08 16:15:03 +02005114
Vladimir Sementsov-Ogievskiy50bf65b2017-06-28 15:05:12 +03005115 bs->explicit_options = reopen_state->explicit_options;
Alberto Garcia4c8350f2018-06-29 14:37:02 +03005116 bs->options = reopen_state->options;
Vladimir Sementsov-Ogievskiy50bf65b2017-06-28 15:05:12 +03005117 bs->open_flags = reopen_state->flags;
Alberto Garcia543770b2018-09-06 12:37:09 +03005118 bs->detect_zeroes = reopen_state->detect_zeroes;
Kevin Wolf355ef4a2013-12-11 20:14:09 +01005119
Alberto Garcia50196d72018-09-06 12:37:03 +03005120 /* Remove child references from bs->options and bs->explicit_options.
5121 * Child options were already removed in bdrv_reopen_queue_child() */
5122 QLIST_FOREACH(child, &bs->children, next) {
5123 qdict_del(bs->explicit_options, child->name);
5124 qdict_del(bs->options, child->name);
5125 }
Vladimir Sementsov-Ogievskiy3d0e8742021-06-10 15:05:35 +03005126 /* backing is probably removed, so it's not handled by previous loop */
5127 qdict_del(bs->explicit_options, "backing");
5128 qdict_del(bs->options, "backing");
5129
Vladimir Sementsov-Ogievskiy1e4c7972021-04-28 18:17:55 +03005130 bdrv_refresh_limits(bs, NULL, NULL);
Paolo Bonzini439cc332023-04-07 17:32:58 +02005131 bdrv_refresh_total_sectors(bs, bs->total_sectors);
Jeff Codye971aa12012-09-20 15:13:19 -04005132}
5133
5134/*
5135 * Abort the reopen, and delete and free the staged changes in
5136 * reopen_state
5137 */
Kevin Wolfce433d22023-09-29 16:51:43 +02005138static void GRAPH_UNLOCKED bdrv_reopen_abort(BDRVReopenState *reopen_state)
Jeff Codye971aa12012-09-20 15:13:19 -04005139{
5140 BlockDriver *drv;
5141
5142 assert(reopen_state != NULL);
5143 drv = reopen_state->bs->drv;
5144 assert(drv != NULL);
Emanuele Giuseppe Espositoda359902022-03-03 10:16:11 -05005145 GLOBAL_STATE_CODE();
Jeff Codye971aa12012-09-20 15:13:19 -04005146
5147 if (drv->bdrv_reopen_abort) {
5148 drv->bdrv_reopen_abort(reopen_state);
5149 }
5150}
5151
5152
Max Reitz64dff522016-01-29 16:36:10 +01005153static void bdrv_close(BlockDriverState *bs)
bellardfc01f7e2003-06-30 10:03:06 +00005154{
Max Reitz33384422014-06-20 21:57:33 +02005155 BdrvAioNotifier *ban, *ban_next;
Alberto Garcia50a3efb2017-11-06 16:53:45 +02005156 BdrvChild *child, *next;
Max Reitz33384422014-06-20 21:57:33 +02005157
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05005158 GLOBAL_STATE_CODE();
Max Reitz30f55fb2016-05-17 16:41:32 +02005159 assert(!bs->refcnt);
Alberto Garcia99b7e772015-09-25 16:41:44 +03005160
Paolo Bonzinifc272912015-12-23 11:48:24 +01005161 bdrv_drained_begin(bs); /* complete I/O */
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02005162 bdrv_flush(bs);
Fam Zheng53ec73e2015-05-29 18:53:14 +08005163 bdrv_drain(bs); /* in case flush left pending I/O */
Paolo Bonzinifc272912015-12-23 11:48:24 +01005164
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02005165 if (bs->drv) {
Vladimir Sementsov-Ogievskiy3c005292018-08-14 15:43:19 +03005166 if (bs->drv->bdrv_close) {
Max Reitz7b99a262019-06-12 16:07:11 +02005167 /* Must unfreeze all children, so bdrv_unref_child() works */
Vladimir Sementsov-Ogievskiy3c005292018-08-14 15:43:19 +03005168 bs->drv->bdrv_close(bs);
5169 }
Kevin Wolf9a4f4c32015-06-16 14:19:22 +02005170 bs->drv = NULL;
bellardb3380822004-03-14 21:38:54 +00005171 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08005172
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05005173 bdrv_graph_wrlock();
Alberto Garcia50a3efb2017-11-06 16:53:45 +02005174 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
Alberto Garciadd4118c2019-05-13 16:46:17 +03005175 bdrv_unref_child(bs, child);
Alberto Garcia50a3efb2017-11-06 16:53:45 +02005176 }
5177
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03005178 assert(!bs->backing);
5179 assert(!bs->file);
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05005180 bdrv_graph_wrunlock();
Kevin Wolf004915a2023-10-27 17:53:26 +02005181
Alberto Garcia50a3efb2017-11-06 16:53:45 +02005182 g_free(bs->opaque);
5183 bs->opaque = NULL;
Stefan Hajnoczid73415a2020-09-23 11:56:46 +01005184 qatomic_set(&bs->copy_on_read, 0);
Alberto Garcia50a3efb2017-11-06 16:53:45 +02005185 bs->backing_file[0] = '\0';
5186 bs->backing_format[0] = '\0';
5187 bs->total_sectors = 0;
5188 bs->encrypted = false;
5189 bs->sg = false;
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02005190 qobject_unref(bs->options);
5191 qobject_unref(bs->explicit_options);
Alberto Garcia50a3efb2017-11-06 16:53:45 +02005192 bs->options = NULL;
5193 bs->explicit_options = NULL;
Marc-André Lureaucb3e7f02018-04-19 17:01:43 +02005194 qobject_unref(bs->full_open_options);
Alberto Garcia50a3efb2017-11-06 16:53:45 +02005195 bs->full_open_options = NULL;
Hanna Reitz0bc329f2021-08-12 10:41:44 +02005196 g_free(bs->block_status_cache);
5197 bs->block_status_cache = NULL;
Alberto Garcia50a3efb2017-11-06 16:53:45 +02005198
Vladimir Sementsov-Ogievskiycca43ae2017-06-28 15:05:16 +03005199 bdrv_release_named_dirty_bitmaps(bs);
5200 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
5201
Max Reitz33384422014-06-20 21:57:33 +02005202 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
5203 g_free(ban);
5204 }
5205 QLIST_INIT(&bs->aio_notifiers);
Paolo Bonzinifc272912015-12-23 11:48:24 +01005206 bdrv_drained_end(bs);
Greg Kurz1a6d3bd2020-10-23 17:01:10 +02005207
5208 /*
5209 * If we're still inside some bdrv_drain_all_begin()/end() sections, end
5210 * them now since this BDS won't exist anymore when bdrv_drain_all_end()
5211 * gets called.
5212 */
5213 if (bs->quiesce_counter) {
5214 bdrv_drain_all_end_quiesce(bs);
5215 }
bellardb3380822004-03-14 21:38:54 +00005216}
5217
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09005218void bdrv_close_all(void)
5219{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05005220 GLOBAL_STATE_CODE();
Emanuele Giuseppe Esposito880eeec2022-09-26 05:32:04 -04005221 assert(job_next(NULL) == NULL);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09005222
Max Reitzca9bd242016-01-29 16:36:14 +01005223 /* Drop references from requests still in flight, such as canceled block
5224 * jobs whose AIO context has not been polled yet */
5225 bdrv_drain_all();
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005226
Max Reitzca9bd242016-01-29 16:36:14 +01005227 blk_remove_all_bs();
5228 blockdev_close_all_bdrv_states();
5229
Kevin Wolfa1a2af02016-04-08 18:26:37 +02005230 assert(QTAILQ_EMPTY(&all_bdrv_states));
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09005231}
5232
Kevin Wolf2f64e1f2023-09-11 11:46:08 +02005233static bool GRAPH_RDLOCK should_update_child(BdrvChild *c, BlockDriverState *to)
Kevin Wolfdd62f1c2015-06-18 14:09:57 +02005234{
Vladimir Sementsov-Ogievskiy2f30b7c2019-02-23 22:20:39 +03005235 GQueue *queue;
5236 GHashTable *found;
5237 bool ret;
Kevin Wolfdd62f1c2015-06-18 14:09:57 +02005238
Max Reitzbd86fb92020-05-13 13:05:13 +02005239 if (c->klass->stay_at_node) {
Kevin Wolfd0ac0382017-03-01 17:30:41 +01005240 return false;
5241 }
5242
Max Reitzec9f10f2018-06-13 20:18:15 +02005243 /* If the child @c belongs to the BDS @to, replacing the current
5244 * c->bs by @to would mean to create a loop.
5245 *
5246 * Such a case occurs when appending a BDS to a backing chain.
5247 * For instance, imagine the following chain:
5248 *
5249 * guest device -> node A -> further backing chain...
5250 *
5251 * Now we create a new BDS B which we want to put on top of this
5252 * chain, so we first attach A as its backing node:
5253 *
5254 * node B
5255 * |
5256 * v
5257 * guest device -> node A -> further backing chain...
5258 *
5259 * Finally we want to replace A by B. When doing that, we want to
5260 * replace all pointers to A by pointers to B -- except for the
5261 * pointer from B because (1) that would create a loop, and (2)
5262 * that pointer should simply stay intact:
5263 *
5264 * guest device -> node B
5265 * |
5266 * v
5267 * node A -> further backing chain...
5268 *
5269 * In general, when replacing a node A (c->bs) by a node B (@to),
5270 * if A is a child of B, that means we cannot replace A by B there
5271 * because that would create a loop. Silently detaching A from B
5272 * is also not really an option. So overall just leaving A in
Vladimir Sementsov-Ogievskiy2f30b7c2019-02-23 22:20:39 +03005273 * place there is the most sensible choice.
5274 *
5275 * We would also create a loop in any cases where @c is only
5276 * indirectly referenced by @to. Prevent this by returning false
5277 * if @c is found (by breadth-first search) anywhere in the whole
5278 * subtree of @to.
5279 */
5280
5281 ret = true;
5282 found = g_hash_table_new(NULL, NULL);
5283 g_hash_table_add(found, to);
5284 queue = g_queue_new();
5285 g_queue_push_tail(queue, to);
5286
5287 while (!g_queue_is_empty(queue)) {
5288 BlockDriverState *v = g_queue_pop_head(queue);
5289 BdrvChild *c2;
5290
5291 QLIST_FOREACH(c2, &v->children, next) {
5292 if (c2 == c) {
5293 ret = false;
5294 break;
5295 }
5296
5297 if (g_hash_table_contains(found, c2->bs)) {
5298 continue;
5299 }
5300
5301 g_queue_push_tail(queue, c2->bs);
5302 g_hash_table_add(found, c2->bs);
Kevin Wolfd0ac0382017-03-01 17:30:41 +01005303 }
5304 }
5305
Vladimir Sementsov-Ogievskiy2f30b7c2019-02-23 22:20:39 +03005306 g_queue_free(queue);
5307 g_hash_table_destroy(found);
5308
5309 return ret;
Kevin Wolfd0ac0382017-03-01 17:30:41 +01005310}
5311
Vladimir Sementsov-Ogievskiy57f08942022-07-26 23:11:34 +03005312static void bdrv_remove_child_commit(void *opaque)
Vladimir Sementsov-Ogievskiy46541ee2021-04-28 18:17:50 +03005313{
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05005314 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03005315 bdrv_child_free(opaque);
Vladimir Sementsov-Ogievskiy46541ee2021-04-28 18:17:50 +03005316}
5317
Vladimir Sementsov-Ogievskiy57f08942022-07-26 23:11:34 +03005318static TransactionActionDrv bdrv_remove_child_drv = {
5319 .commit = bdrv_remove_child_commit,
Vladimir Sementsov-Ogievskiy46541ee2021-04-28 18:17:50 +03005320};
5321
Kevin Wolf2f64e1f2023-09-11 11:46:08 +02005322/*
5323 * Function doesn't update permissions, caller is responsible for this.
5324 *
5325 * @child->bs (if non-NULL) must be drained.
Kevin Wolf5661a002023-09-11 11:46:10 +02005326 *
5327 * After calling this function, the transaction @tran may only be completed
5328 * while holding a writer lock for the graph.
Kevin Wolf2f64e1f2023-09-11 11:46:08 +02005329 */
5330static void GRAPH_WRLOCK bdrv_remove_child(BdrvChild *child, Transaction *tran)
Vladimir Sementsov-Ogievskiy46541ee2021-04-28 18:17:50 +03005331{
Vladimir Sementsov-Ogievskiy46541ee2021-04-28 18:17:50 +03005332 if (!child) {
5333 return;
5334 }
5335
5336 if (child->bs) {
Kevin Wolf2f64e1f2023-09-11 11:46:08 +02005337 assert(child->quiesced_parent);
Vladimir Sementsov-Ogievskiya2c37a32022-07-26 23:11:30 +03005338 bdrv_replace_child_tran(child, NULL, tran);
Vladimir Sementsov-Ogievskiy46541ee2021-04-28 18:17:50 +03005339 }
5340
Vladimir Sementsov-Ogievskiy57f08942022-07-26 23:11:34 +03005341 tran_add(tran, &bdrv_remove_child_drv, child);
Vladimir Sementsov-Ogievskiy46541ee2021-04-28 18:17:50 +03005342}
5343
Kevin Wolf2f64e1f2023-09-11 11:46:08 +02005344/*
5345 * Both @from and @to (if non-NULL) must be drained. @to must be kept drained
5346 * until the transaction is completed.
Kevin Wolf5661a002023-09-11 11:46:10 +02005347 *
5348 * After calling this function, the transaction @tran may only be completed
5349 * while holding a writer lock for the graph.
Kevin Wolf2f64e1f2023-09-11 11:46:08 +02005350 */
5351static int GRAPH_WRLOCK
5352bdrv_replace_node_noperm(BlockDriverState *from,
5353 BlockDriverState *to,
5354 bool auto_skip, Transaction *tran,
5355 Error **errp)
Vladimir Sementsov-Ogievskiy117caba2021-04-28 18:17:48 +03005356{
5357 BdrvChild *c, *next;
5358
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05005359 GLOBAL_STATE_CODE();
Hanna Reitz82b54cf2021-11-15 15:54:04 +01005360
Kevin Wolf2f64e1f2023-09-11 11:46:08 +02005361 assert(from->quiesce_counter);
5362 assert(to->quiesce_counter);
Kevin Wolf23987472022-11-18 18:41:09 +01005363
Vladimir Sementsov-Ogievskiy117caba2021-04-28 18:17:48 +03005364 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
5365 assert(c->bs == from);
5366 if (!should_update_child(c, to)) {
5367 if (auto_skip) {
5368 continue;
5369 }
5370 error_setg(errp, "Should not change '%s' link to '%s'",
5371 c->name, from->node_name);
5372 return -EINVAL;
5373 }
5374 if (c->frozen) {
5375 error_setg(errp, "Cannot change '%s' link to '%s'",
5376 c->name, from->node_name);
5377 return -EPERM;
5378 }
Vladimir Sementsov-Ogievskiy0f0b1e22022-07-26 23:11:29 +03005379 bdrv_replace_child_tran(c, to, tran);
Vladimir Sementsov-Ogievskiy117caba2021-04-28 18:17:48 +03005380 }
5381
5382 return 0;
5383}
5384
Vladimir Sementsov-Ogievskiy313274b2020-11-06 15:42:36 +03005385/*
Kevin Wolf5c0ef492023-10-27 17:53:24 +02005386 * Switch all parents of @from to point to @to instead. @from and @to must be in
5387 * the same AioContext and both must be drained.
5388 *
Vladimir Sementsov-Ogievskiy313274b2020-11-06 15:42:36 +03005389 * With auto_skip=true bdrv_replace_node_common skips updating from parents
5390 * if it creates a parent-child relation loop or if parent is block-job.
5391 *
5392 * With auto_skip=false the error is returned if from has a parent which should
5393 * not be updated.
Vladimir Sementsov-Ogievskiy3108a152021-04-28 18:17:51 +03005394 *
5395 * With @detach_subchain=true @to must be in a backing chain of @from. In this
5396 * case backing link of the cow-parent of @to is removed.
Vladimir Sementsov-Ogievskiy313274b2020-11-06 15:42:36 +03005397 */
Kevin Wolf5c0ef492023-10-27 17:53:24 +02005398static int GRAPH_WRLOCK
5399bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to,
5400 bool auto_skip, bool detach_subchain, Error **errp)
Kevin Wolfdd62f1c2015-06-18 14:09:57 +02005401{
Vladimir Sementsov-Ogievskiy3bb0e292021-04-28 18:17:45 +03005402 Transaction *tran = tran_new();
Vladimir Sementsov-Ogievskiy3bb0e292021-04-28 18:17:45 +03005403 g_autoptr(GSList) refresh_list = NULL;
Miroslav Rezanina2d369d62021-05-05 03:59:03 -04005404 BlockDriverState *to_cow_parent = NULL;
Kevin Wolf234ac1a2017-03-02 18:43:00 +01005405 int ret;
Kevin Wolfdd62f1c2015-06-18 14:09:57 +02005406
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05005407 GLOBAL_STATE_CODE();
Hanna Reitz82b54cf2021-11-15 15:54:04 +01005408
Kevin Wolf5c0ef492023-10-27 17:53:24 +02005409 assert(from->quiesce_counter);
5410 assert(to->quiesce_counter);
Kevin Wolf30dd65f2020-03-10 12:38:29 +01005411 assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
Kevin Wolff871abd2019-05-21 19:00:25 +02005412
Kevin Wolf372b69f2023-10-27 17:53:15 +02005413 if (detach_subchain) {
5414 assert(bdrv_chain_contains(from, to));
5415 assert(from != to);
5416 for (to_cow_parent = from;
5417 bdrv_filter_or_cow_bs(to_cow_parent) != to;
5418 to_cow_parent = bdrv_filter_or_cow_bs(to_cow_parent))
5419 {
5420 ;
5421 }
5422 }
5423
Vladimir Sementsov-Ogievskiy3bb0e292021-04-28 18:17:45 +03005424 /*
5425 * Do the replacement without permission update.
5426 * Replacement may influence the permissions, we should calculate new
5427 * permissions based on new graph. If we fail, we'll roll-back the
5428 * replacement.
5429 */
Vladimir Sementsov-Ogievskiy117caba2021-04-28 18:17:48 +03005430 ret = bdrv_replace_node_noperm(from, to, auto_skip, tran, errp);
5431 if (ret < 0) {
5432 goto out;
Kevin Wolf234ac1a2017-03-02 18:43:00 +01005433 }
5434
Vladimir Sementsov-Ogievskiy3108a152021-04-28 18:17:51 +03005435 if (detach_subchain) {
Kevin Wolf2f64e1f2023-09-11 11:46:08 +02005436 /* to_cow_parent is already drained because from is drained */
Vladimir Sementsov-Ogievskiyf38eaec2022-11-07 19:35:56 +03005437 bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran);
Vladimir Sementsov-Ogievskiy3108a152021-04-28 18:17:51 +03005438 }
5439
Vladimir Sementsov-Ogievskiyfb0ff4d2022-11-07 19:35:58 +03005440 refresh_list = g_slist_prepend(refresh_list, to);
5441 refresh_list = g_slist_prepend(refresh_list, from);
Vladimir Sementsov-Ogievskiy3bb0e292021-04-28 18:17:45 +03005442
5443 ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
Kevin Wolf234ac1a2017-03-02 18:43:00 +01005444 if (ret < 0) {
Kevin Wolf234ac1a2017-03-02 18:43:00 +01005445 goto out;
5446 }
5447
Vladimir Sementsov-Ogievskiya1e708f2021-02-02 15:49:43 +03005448 ret = 0;
5449
Kevin Wolf234ac1a2017-03-02 18:43:00 +01005450out:
Vladimir Sementsov-Ogievskiy3bb0e292021-04-28 18:17:45 +03005451 tran_finalize(tran, ret);
Kevin Wolf5c0ef492023-10-27 17:53:24 +02005452 return ret;
5453}
Vladimir Sementsov-Ogievskiy3bb0e292021-04-28 18:17:45 +03005454
Kevin Wolf5c0ef492023-10-27 17:53:24 +02005455int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
5456 Error **errp)
5457{
Kevin Wolfccd6a372023-10-27 17:53:25 +02005458 return bdrv_replace_node_common(from, to, true, false, errp);
Kevin Wolfdd62f1c2015-06-18 14:09:57 +02005459}
5460
Vladimir Sementsov-Ogievskiy3108a152021-04-28 18:17:51 +03005461int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
5462{
Kevin Wolf372b69f2023-10-27 17:53:15 +02005463 BlockDriverState *child_bs;
Kevin Wolf5c0ef492023-10-27 17:53:24 +02005464 int ret;
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05005465
Kevin Wolf372b69f2023-10-27 17:53:15 +02005466 GLOBAL_STATE_CODE();
Kevin Wolf5c0ef492023-10-27 17:53:24 +02005467
Kevin Wolf372b69f2023-10-27 17:53:15 +02005468 bdrv_graph_rdlock_main_loop();
5469 child_bs = bdrv_filter_or_cow_bs(bs);
5470 bdrv_graph_rdunlock_main_loop();
5471
Kevin Wolf5c0ef492023-10-27 17:53:24 +02005472 bdrv_drained_begin(child_bs);
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05005473 bdrv_graph_wrlock();
Kevin Wolf5c0ef492023-10-27 17:53:24 +02005474 ret = bdrv_replace_node_common(bs, child_bs, true, true, errp);
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05005475 bdrv_graph_wrunlock();
Kevin Wolf5c0ef492023-10-27 17:53:24 +02005476 bdrv_drained_end(child_bs);
5477
5478 return ret;
Vladimir Sementsov-Ogievskiy313274b2020-11-06 15:42:36 +03005479}
5480
Jeff Cody8802d1f2012-02-28 15:54:06 -05005481/*
5482 * Add new bs contents at the top of an image chain while the chain is
5483 * live, while keeping required fields on the top layer.
5484 *
5485 * This will modify the BlockDriverState fields, and swap contents
5486 * between bs_new and bs_top. Both bs_new and bs_top are modified.
5487 *
Vladimir Sementsov-Ogievskiy2272edc2021-04-28 18:17:49 +03005488 * bs_new must not be attached to a BlockBackend and must not have backing
5489 * child.
Jeff Codyf6801b82012-03-27 16:30:19 -04005490 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05005491 * This function does not create any image files.
5492 */
Vladimir Sementsov-Ogievskiya1e708f2021-02-02 15:49:43 +03005493int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
5494 Error **errp)
Jeff Cody8802d1f2012-02-28 15:54:06 -05005495{
Vladimir Sementsov-Ogievskiy2272edc2021-04-28 18:17:49 +03005496 int ret;
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03005497 BdrvChild *child;
Vladimir Sementsov-Ogievskiy2272edc2021-04-28 18:17:49 +03005498 Transaction *tran = tran_new();
5499
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05005500 GLOBAL_STATE_CODE();
5501
Kevin Wolf004915a2023-10-27 17:53:26 +02005502 bdrv_graph_rdlock_main_loop();
Vladimir Sementsov-Ogievskiy2272edc2021-04-28 18:17:49 +03005503 assert(!bs_new->backing);
Kevin Wolf004915a2023-10-27 17:53:26 +02005504 bdrv_graph_rdunlock_main_loop();
Vladimir Sementsov-Ogievskiy2272edc2021-04-28 18:17:49 +03005505
Fiona Ebner2b833592025-05-30 17:10:47 +02005506 bdrv_drain_all_begin();
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05005507 bdrv_graph_wrlock();
Stefano Garzarella60d90bf2023-02-14 18:16:21 +01005508
Vladimir Sementsov-Ogievskiy5bb047472022-07-26 23:11:32 +03005509 child = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
5510 &child_of_bds, bdrv_backing_role(bs_new),
5511 tran, errp);
5512 if (!child) {
5513 ret = -EINVAL;
Vladimir Sementsov-Ogievskiy2272edc2021-04-28 18:17:49 +03005514 goto out;
Kevin Wolfb2c28322017-02-20 12:46:42 +01005515 }
Kevin Wolfdd62f1c2015-06-18 14:09:57 +02005516
Vladimir Sementsov-Ogievskiy2272edc2021-04-28 18:17:49 +03005517 ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
Vladimir Sementsov-Ogievskiya1e708f2021-02-02 15:49:43 +03005518 if (ret < 0) {
Vladimir Sementsov-Ogievskiy2272edc2021-04-28 18:17:49 +03005519 goto out;
Kevin Wolf234ac1a2017-03-02 18:43:00 +01005520 }
Kevin Wolfdd62f1c2015-06-18 14:09:57 +02005521
Vladimir Sementsov-Ogievskiyf1316ed2022-11-07 19:35:57 +03005522 ret = bdrv_refresh_perms(bs_new, tran, errp);
Vladimir Sementsov-Ogievskiy2272edc2021-04-28 18:17:49 +03005523out:
5524 tran_finalize(tran, ret);
5525
Vladimir Sementsov-Ogievskiy1e4c7972021-04-28 18:17:55 +03005526 bdrv_refresh_limits(bs_top, NULL, NULL);
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05005527 bdrv_graph_wrunlock();
Fiona Ebner2b833592025-05-30 17:10:47 +02005528 bdrv_drain_all_end();
Kevin Wolf2f64e1f2023-09-11 11:46:08 +02005529
Vladimir Sementsov-Ogievskiy2272edc2021-04-28 18:17:49 +03005530 return ret;
Jeff Cody8802d1f2012-02-28 15:54:06 -05005531}
5532
Vladimir Sementsov-Ogievskiybd8f4c42021-08-24 11:38:23 +03005533/* Not for empty child */
5534int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
5535 Error **errp)
5536{
5537 int ret;
5538 Transaction *tran = tran_new();
Vladimir Sementsov-Ogievskiybd8f4c42021-08-24 11:38:23 +03005539 g_autoptr(GSList) refresh_list = NULL;
5540 BlockDriverState *old_bs = child->bs;
5541
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05005542 GLOBAL_STATE_CODE();
5543
Vladimir Sementsov-Ogievskiybd8f4c42021-08-24 11:38:23 +03005544 bdrv_ref(old_bs);
5545 bdrv_drained_begin(old_bs);
5546 bdrv_drained_begin(new_bs);
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05005547 bdrv_graph_wrlock();
Vladimir Sementsov-Ogievskiybd8f4c42021-08-24 11:38:23 +03005548
Vladimir Sementsov-Ogievskiy0f0b1e22022-07-26 23:11:29 +03005549 bdrv_replace_child_tran(child, new_bs, tran);
Vladimir Sementsov-Ogievskiybd8f4c42021-08-24 11:38:23 +03005550
Vladimir Sementsov-Ogievskiyfb0ff4d2022-11-07 19:35:58 +03005551 refresh_list = g_slist_prepend(refresh_list, old_bs);
5552 refresh_list = g_slist_prepend(refresh_list, new_bs);
Vladimir Sementsov-Ogievskiybd8f4c42021-08-24 11:38:23 +03005553
5554 ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
5555
5556 tran_finalize(tran, ret);
5557
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05005558 bdrv_graph_wrunlock();
Vladimir Sementsov-Ogievskiybd8f4c42021-08-24 11:38:23 +03005559 bdrv_drained_end(old_bs);
5560 bdrv_drained_end(new_bs);
5561 bdrv_unref(old_bs);
5562
5563 return ret;
5564}
5565
Fam Zheng4f6fd342013-08-23 09:14:47 +08005566static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00005567{
Fam Zheng3718d8a2014-05-23 21:29:43 +08005568 assert(bdrv_op_blocker_is_empty(bs));
Fam Zheng4f6fd342013-08-23 09:14:47 +08005569 assert(!bs->refcnt);
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05005570 GLOBAL_STATE_CODE();
Markus Armbruster18846de2010-06-29 16:58:30 +02005571
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01005572 /* remove from list, if necessary */
Kevin Wolf63eaaae2016-03-18 10:46:57 +01005573 if (bs->node_name[0] != '\0') {
5574 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
5575 }
Max Reitz2c1d04e2016-01-29 16:36:11 +01005576 QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
5577
Anton Kuchin30c321f2019-05-07 11:12:56 +03005578 bdrv_close(bs);
5579
Stefan Hajnoczifa9185f2023-08-08 11:58:52 -04005580 qemu_mutex_destroy(&bs->reqs_lock);
5581
Anthony Liguori7267c092011-08-20 22:09:37 -05005582 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00005583}
5584
Vladimir Sementsov-Ogievskiy96796fa2021-09-20 14:55:36 +03005585
5586/*
5587 * Replace @bs by newly created block node.
5588 *
5589 * @options is a QDict of options to pass to the block drivers, or NULL for an
5590 * empty set of options. The reference to the QDict belongs to the block layer
5591 * after the call (even on failure), so if the caller intends to reuse the
5592 * dictionary, it needs to use qobject_ref() before calling bdrv_open.
Kevin Wolf88234072023-05-25 14:47:11 +02005593 *
Stefan Hajnoczi23c983c2023-12-05 13:20:11 -05005594 * The caller must make sure that @bs stays in the same AioContext, i.e.
5595 * @options must not refer to nodes in a different AioContext.
Vladimir Sementsov-Ogievskiy96796fa2021-09-20 14:55:36 +03005596 */
5597BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
Andrey Shinkevich8872ef72020-12-16 09:16:52 +03005598 int flags, Error **errp)
5599{
Vladimir Sementsov-Ogievskiyf053b7e2021-09-20 14:55:35 +03005600 ERRP_GUARD();
5601 int ret;
Kevin Wolf88234072023-05-25 14:47:11 +02005602 AioContext *ctx = bdrv_get_aio_context(bs);
Vladimir Sementsov-Ogievskiyb11c8732021-09-20 14:55:37 +03005603 BlockDriverState *new_node_bs = NULL;
5604 const char *drvname, *node_name;
5605 BlockDriver *drv;
Andrey Shinkevich8872ef72020-12-16 09:16:52 +03005606
Vladimir Sementsov-Ogievskiyb11c8732021-09-20 14:55:37 +03005607 drvname = qdict_get_try_str(options, "driver");
5608 if (!drvname) {
5609 error_setg(errp, "driver is not specified");
5610 goto fail;
5611 }
5612
5613 drv = bdrv_find_format(drvname);
5614 if (!drv) {
5615 error_setg(errp, "Unknown driver: '%s'", drvname);
5616 goto fail;
5617 }
5618
5619 node_name = qdict_get_try_str(options, "node-name");
5620
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05005621 GLOBAL_STATE_CODE();
5622
Vladimir Sementsov-Ogievskiyb11c8732021-09-20 14:55:37 +03005623 new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags,
5624 errp);
Kevin Wolf88234072023-05-25 14:47:11 +02005625 assert(bdrv_get_aio_context(bs) == ctx);
5626
Vladimir Sementsov-Ogievskiyb11c8732021-09-20 14:55:37 +03005627 options = NULL; /* bdrv_new_open_driver() eats options */
5628 if (!new_node_bs) {
Andrey Shinkevich8872ef72020-12-16 09:16:52 +03005629 error_prepend(errp, "Could not create node: ");
Vladimir Sementsov-Ogievskiyb11c8732021-09-20 14:55:37 +03005630 goto fail;
Andrey Shinkevich8872ef72020-12-16 09:16:52 +03005631 }
5632
Kevin Wolfccd6a372023-10-27 17:53:25 +02005633 /*
5634 * Make sure that @bs doesn't go away until we have successfully attached
5635 * all of its parents to @new_node_bs and undrained it again.
5636 */
5637 bdrv_ref(bs);
Andrey Shinkevich8872ef72020-12-16 09:16:52 +03005638 bdrv_drained_begin(bs);
Kevin Wolfccd6a372023-10-27 17:53:25 +02005639 bdrv_drained_begin(new_node_bs);
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05005640 bdrv_graph_wrlock();
Vladimir Sementsov-Ogievskiyf053b7e2021-09-20 14:55:35 +03005641 ret = bdrv_replace_node(bs, new_node_bs, errp);
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05005642 bdrv_graph_wrunlock();
Kevin Wolfccd6a372023-10-27 17:53:25 +02005643 bdrv_drained_end(new_node_bs);
Andrey Shinkevich8872ef72020-12-16 09:16:52 +03005644 bdrv_drained_end(bs);
Kevin Wolfccd6a372023-10-27 17:53:25 +02005645 bdrv_unref(bs);
Andrey Shinkevich8872ef72020-12-16 09:16:52 +03005646
Vladimir Sementsov-Ogievskiyf053b7e2021-09-20 14:55:35 +03005647 if (ret < 0) {
5648 error_prepend(errp, "Could not replace node: ");
Vladimir Sementsov-Ogievskiyb11c8732021-09-20 14:55:37 +03005649 goto fail;
Andrey Shinkevich8872ef72020-12-16 09:16:52 +03005650 }
5651
5652 return new_node_bs;
Vladimir Sementsov-Ogievskiyb11c8732021-09-20 14:55:37 +03005653
5654fail:
5655 qobject_unref(options);
5656 bdrv_unref(new_node_bs);
5657 return NULL;
Andrey Shinkevich8872ef72020-12-16 09:16:52 +03005658}
5659
aliguorie97fc192009-04-21 23:11:50 +00005660/*
5661 * Run consistency checks on an image
5662 *
Kevin Wolfe076f332010-06-29 11:43:13 +02005663 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02005664 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02005665 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00005666 */
Vladimir Sementsov-Ogievskiy21c22832020-09-24 21:54:10 +03005667int coroutine_fn bdrv_co_check(BlockDriverState *bs,
5668 BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00005669{
Emanuele Giuseppe Esposito1581a702022-03-03 10:16:09 -05005670 IO_CODE();
Kevin Wolf1b3ff9f2022-12-07 14:18:38 +01005671 assert_bdrv_graph_readable();
Max Reitz908bcd52014-08-07 22:47:55 +02005672 if (bs->drv == NULL) {
5673 return -ENOMEDIUM;
5674 }
Paolo Bonzini2fd61632018-03-01 17:36:19 +01005675 if (bs->drv->bdrv_co_check == NULL) {
aliguorie97fc192009-04-21 23:11:50 +00005676 return -ENOTSUP;
5677 }
5678
Kevin Wolfe076f332010-06-29 11:43:13 +02005679 memset(res, 0, sizeof(*res));
Paolo Bonzini2fd61632018-03-01 17:36:19 +01005680 return bs->drv->bdrv_co_check(bs, res, fix);
5681}
5682
Kevin Wolf756e6732010-01-12 12:55:17 +01005683/*
5684 * Return values:
5685 * 0 - success
5686 * -EINVAL - backing format specified, but no file
5687 * -ENOSPC - can't update the backing file because no space is left in the
5688 * image file header
5689 * -ENOTSUP - format driver doesn't support changing the backing file
5690 */
Kevin Wolfe2dd2732023-10-27 17:53:28 +02005691int coroutine_fn
5692bdrv_co_change_backing_file(BlockDriverState *bs, const char *backing_file,
5693 const char *backing_fmt, bool require)
Kevin Wolf756e6732010-01-12 12:55:17 +01005694{
5695 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02005696 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01005697
Kevin Wolfe2dd2732023-10-27 17:53:28 +02005698 IO_CODE();
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05005699
Max Reitzd470ad42017-11-10 21:31:09 +01005700 if (!drv) {
5701 return -ENOMEDIUM;
5702 }
5703
Paolo Bonzini5f377792012-04-12 14:01:01 +02005704 /* Backing file format doesn't make sense without a backing file */
5705 if (backing_fmt && !backing_file) {
5706 return -EINVAL;
5707 }
5708
Eric Blake497a30d2021-05-03 14:36:00 -07005709 if (require && backing_file && !backing_fmt) {
5710 return -EINVAL;
Eric Blakee54ee1b2020-07-06 15:39:53 -05005711 }
5712
Kevin Wolfe2dd2732023-10-27 17:53:28 +02005713 if (drv->bdrv_co_change_backing_file != NULL) {
5714 ret = drv->bdrv_co_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01005715 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02005716 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01005717 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02005718
5719 if (ret == 0) {
5720 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
5721 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
Max Reitz998c2012019-02-01 20:29:08 +01005722 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
5723 backing_file ?: "");
Paolo Bonzini469ef352012-04-12 14:01:02 +02005724 }
5725 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01005726}
5727
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005728/*
Max Reitzdcf3f9b2019-06-12 17:34:45 +02005729 * Finds the first non-filter node above bs in the chain between
5730 * active and bs. The returned node is either an immediate parent of
5731 * bs, or there are only filter nodes between the two.
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005732 *
5733 * Returns NULL if bs is not found in active's image chain,
5734 * or if active == bs.
Jeff Cody4caf0fc2014-06-25 15:35:26 -04005735 *
5736 * Returns the bottommost base image if bs == NULL.
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005737 */
5738BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
5739 BlockDriverState *bs)
5740{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05005741
5742 GLOBAL_STATE_CODE();
5743
Max Reitzdcf3f9b2019-06-12 17:34:45 +02005744 bs = bdrv_skip_filters(bs);
5745 active = bdrv_skip_filters(active);
5746
5747 while (active) {
5748 BlockDriverState *next = bdrv_backing_chain_next(active);
5749 if (bs == next) {
5750 return active;
5751 }
5752 active = next;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005753 }
5754
Max Reitzdcf3f9b2019-06-12 17:34:45 +02005755 return NULL;
Jeff Cody4caf0fc2014-06-25 15:35:26 -04005756}
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005757
Jeff Cody4caf0fc2014-06-25 15:35:26 -04005758/* Given a BDS, searches for the base layer. */
5759BlockDriverState *bdrv_find_base(BlockDriverState *bs)
5760{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05005761 GLOBAL_STATE_CODE();
5762
Jeff Cody4caf0fc2014-06-25 15:35:26 -04005763 return bdrv_find_overlay(bs, NULL);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005764}
5765
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005766/*
Max Reitz7b99a262019-06-12 16:07:11 +02005767 * Return true if at least one of the COW (backing) and filter links
5768 * between @bs and @base is frozen. @errp is set if that's the case.
Alberto Garcia0f0998f2019-03-28 18:25:09 +02005769 * @base must be reachable from @bs, or NULL.
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02005770 */
Kevin Wolf9275fc72023-10-27 17:53:18 +02005771static bool GRAPH_RDLOCK
5772bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
5773 Error **errp)
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02005774{
5775 BlockDriverState *i;
Max Reitz7b99a262019-06-12 16:07:11 +02005776 BdrvChild *child;
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02005777
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05005778 GLOBAL_STATE_CODE();
5779
Max Reitz7b99a262019-06-12 16:07:11 +02005780 for (i = bs; i != base; i = child_bs(child)) {
5781 child = bdrv_filter_or_cow_child(i);
5782
5783 if (child && child->frozen) {
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02005784 error_setg(errp, "Cannot change '%s' link from '%s' to '%s'",
Max Reitz7b99a262019-06-12 16:07:11 +02005785 child->name, i->node_name, child->bs->node_name);
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02005786 return true;
5787 }
5788 }
5789
5790 return false;
5791}
5792
5793/*
Max Reitz7b99a262019-06-12 16:07:11 +02005794 * Freeze all COW (backing) and filter links between @bs and @base.
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02005795 * If any of the links is already frozen the operation is aborted and
5796 * none of the links are modified.
Alberto Garcia0f0998f2019-03-28 18:25:09 +02005797 * @base must be reachable from @bs, or NULL.
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02005798 * Returns 0 on success. On failure returns < 0 and sets @errp.
5799 */
5800int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
5801 Error **errp)
5802{
5803 BlockDriverState *i;
Max Reitz7b99a262019-06-12 16:07:11 +02005804 BdrvChild *child;
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02005805
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05005806 GLOBAL_STATE_CODE();
5807
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02005808 if (bdrv_is_backing_chain_frozen(bs, base, errp)) {
5809 return -EPERM;
5810 }
5811
Max Reitz7b99a262019-06-12 16:07:11 +02005812 for (i = bs; i != base; i = child_bs(child)) {
5813 child = bdrv_filter_or_cow_child(i);
5814 if (child && child->bs->never_freeze) {
Max Reitze5182c12019-07-03 19:28:02 +02005815 error_setg(errp, "Cannot freeze '%s' link to '%s'",
Max Reitz7b99a262019-06-12 16:07:11 +02005816 child->name, child->bs->node_name);
Max Reitze5182c12019-07-03 19:28:02 +02005817 return -EPERM;
5818 }
5819 }
5820
Max Reitz7b99a262019-06-12 16:07:11 +02005821 for (i = bs; i != base; i = child_bs(child)) {
5822 child = bdrv_filter_or_cow_child(i);
5823 if (child) {
5824 child->frozen = true;
Alberto Garcia0f0998f2019-03-28 18:25:09 +02005825 }
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02005826 }
5827
5828 return 0;
5829}
5830
5831/*
Max Reitz7b99a262019-06-12 16:07:11 +02005832 * Unfreeze all COW (backing) and filter links between @bs and @base.
5833 * The caller must ensure that all links are frozen before using this
5834 * function.
Alberto Garcia0f0998f2019-03-28 18:25:09 +02005835 * @base must be reachable from @bs, or NULL.
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02005836 */
5837void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base)
5838{
5839 BlockDriverState *i;
Max Reitz7b99a262019-06-12 16:07:11 +02005840 BdrvChild *child;
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02005841
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05005842 GLOBAL_STATE_CODE();
5843
Max Reitz7b99a262019-06-12 16:07:11 +02005844 for (i = bs; i != base; i = child_bs(child)) {
5845 child = bdrv_filter_or_cow_child(i);
5846 if (child) {
5847 assert(child->frozen);
5848 child->frozen = false;
Alberto Garcia0f0998f2019-03-28 18:25:09 +02005849 }
Alberto Garcia2cad1eb2019-03-12 18:48:40 +02005850 }
5851}
5852
5853/*
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005854 * Drops images above 'base' up to and including 'top', and sets the image
5855 * above 'top' to have base as its backing file.
5856 *
5857 * Requires that the overlay to 'top' is opened r/w, so that the backing file
5858 * information in 'bs' can be properly updated.
5859 *
5860 * E.g., this will convert the following chain:
5861 * bottom <- base <- intermediate <- top <- active
5862 *
5863 * to
5864 *
5865 * bottom <- base <- active
5866 *
5867 * It is allowed for bottom==base, in which case it converts:
5868 *
5869 * base <- intermediate <- top <- active
5870 *
5871 * to
5872 *
5873 * base <- active
5874 *
Jeff Cody54e26902014-06-25 15:40:10 -04005875 * If backing_file_str is non-NULL, it will be used when modifying top's
5876 * overlay image metadata.
5877 *
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005878 * Error conditions:
5879 * if active == top, that is considered an error
5880 *
5881 */
Kevin Wolfbde70712017-06-27 20:36:18 +02005882int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
Peter Krempa4b028cb2023-12-05 18:14:41 +01005883 const char *backing_file_str,
5884 bool backing_mask_protocol)
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005885{
Alberto Garcia6bd858b2018-10-31 18:16:38 +02005886 BlockDriverState *explicit_top = top;
5887 bool update_inherits_from;
Vladimir Sementsov-Ogievskiyd669ed62020-11-06 15:42:37 +03005888 BdrvChild *c;
Kevin Wolf12fa4af2017-02-17 20:42:32 +01005889 Error *local_err = NULL;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005890 int ret = -EIO;
Vladimir Sementsov-Ogievskiyd669ed62020-11-06 15:42:37 +03005891 g_autoptr(GSList) updated_children = NULL;
5892 GSList *p;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005893
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05005894 GLOBAL_STATE_CODE();
5895
Kevin Wolf6858eba2017-06-29 19:32:21 +02005896 bdrv_ref(top);
Kevin Wolf631086d2022-11-18 18:41:03 +01005897 bdrv_drained_begin(base);
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05005898 bdrv_graph_wrlock();
Kevin Wolf6858eba2017-06-29 19:32:21 +02005899
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005900 if (!top->drv || !base->drv) {
Kevin Wolf5c0ef492023-10-27 17:53:24 +02005901 goto exit_wrlock;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005902 }
5903
Kevin Wolf5db15a52015-09-14 15:33:33 +02005904 /* Make sure that base is in the backing chain of top */
5905 if (!bdrv_chain_contains(top, base)) {
Kevin Wolf5c0ef492023-10-27 17:53:24 +02005906 goto exit_wrlock;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005907 }
5908
Alberto Garcia6bd858b2018-10-31 18:16:38 +02005909 /* If 'base' recursively inherits from 'top' then we should set
5910 * base->inherits_from to top->inherits_from after 'top' and all
5911 * other intermediate nodes have been dropped.
5912 * If 'top' is an implicit node (e.g. "commit_top") we should skip
5913 * it because no one inherits from it. We use explicit_top for that. */
Max Reitzdcf3f9b2019-06-12 17:34:45 +02005914 explicit_top = bdrv_skip_implicit_filters(explicit_top);
Alberto Garcia6bd858b2018-10-31 18:16:38 +02005915 update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top);
5916
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005917 /* success - we can delete the intermediate states, and link top->base */
Max Reitzf30c66b2019-02-01 20:29:05 +01005918 if (!backing_file_str) {
5919 bdrv_refresh_filename(base);
5920 backing_file_str = base->filename;
5921 }
Kevin Wolf61f09ce2017-09-19 16:22:54 +02005922
Vladimir Sementsov-Ogievskiyd669ed62020-11-06 15:42:37 +03005923 QLIST_FOREACH(c, &top->parents, next_parent) {
5924 updated_children = g_slist_prepend(updated_children, c);
5925 }
Kevin Wolf12fa4af2017-02-17 20:42:32 +01005926
Vladimir Sementsov-Ogievskiy3108a152021-04-28 18:17:51 +03005927 /*
5928 * It seems correct to pass detach_subchain=true here, but it triggers
5929 * one more yet not fixed bug, when due to nested aio_poll loop we switch to
5930 * another drained section, which modify the graph (for example, removing
5931 * the child, which we keep in updated_children list). So, it's a TODO.
5932 *
5933 * Note, bug triggered if pass detach_subchain=true here and run
5934 * test-bdrv-drain. test_drop_intermediate_poll() test-case will crash.
5935 * That's a FIXME.
5936 */
5937 bdrv_replace_node_common(top, base, false, false, &local_err);
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05005938 bdrv_graph_wrunlock();
Kevin Wolf5c0ef492023-10-27 17:53:24 +02005939
Vladimir Sementsov-Ogievskiyd669ed62020-11-06 15:42:37 +03005940 if (local_err) {
5941 error_report_err(local_err);
5942 goto exit;
5943 }
5944
5945 for (p = updated_children; p; p = p->next) {
5946 c = p->data;
5947
Max Reitzbd86fb92020-05-13 13:05:13 +02005948 if (c->klass->update_filename) {
5949 ret = c->klass->update_filename(c, base, backing_file_str,
Peter Krempa4b028cb2023-12-05 18:14:41 +01005950 backing_mask_protocol,
Max Reitzbd86fb92020-05-13 13:05:13 +02005951 &local_err);
Kevin Wolf61f09ce2017-09-19 16:22:54 +02005952 if (ret < 0) {
Vladimir Sementsov-Ogievskiyd669ed62020-11-06 15:42:37 +03005953 /*
5954 * TODO: Actually, we want to rollback all previous iterations
5955 * of this loop, and (which is almost impossible) previous
5956 * bdrv_replace_node()...
5957 *
5958 * Note, that c->klass->update_filename may lead to permission
5959 * update, so it's a bad idea to call it inside permission
5960 * update transaction of bdrv_replace_node.
5961 */
Kevin Wolf61f09ce2017-09-19 16:22:54 +02005962 error_report_err(local_err);
5963 goto exit;
5964 }
5965 }
Kevin Wolf12fa4af2017-02-17 20:42:32 +01005966 }
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005967
Alberto Garcia6bd858b2018-10-31 18:16:38 +02005968 if (update_inherits_from) {
5969 base->inherits_from = explicit_top->inherits_from;
5970 }
5971
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005972 ret = 0;
Kevin Wolf5c0ef492023-10-27 17:53:24 +02005973 goto exit;
5974
5975exit_wrlock:
Stefan Hajnoczi6bc30f12023-12-05 13:20:02 -05005976 bdrv_graph_wrunlock();
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005977exit:
Kevin Wolf631086d2022-11-18 18:41:03 +01005978 bdrv_drained_end(base);
Kevin Wolf6858eba2017-06-29 19:32:21 +02005979 bdrv_unref(top);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04005980 return ret;
5981}
5982
bellard83f64092006-08-01 16:21:11 +00005983/**
Emanuele Giuseppe Esposito82618d72023-01-13 21:42:07 +01005984 * Implementation of BlockDriver.bdrv_co_get_allocated_file_size() that
Max Reitz081e4652019-06-12 18:14:13 +02005985 * sums the size of all data-bearing children. (This excludes backing
5986 * children.)
5987 */
Emanuele Giuseppe Espositode335632023-05-04 13:57:43 +02005988static int64_t coroutine_fn GRAPH_RDLOCK
5989bdrv_sum_allocated_file_size(BlockDriverState *bs)
Max Reitz081e4652019-06-12 18:14:13 +02005990{
5991 BdrvChild *child;
5992 int64_t child_size, sum = 0;
5993
5994 QLIST_FOREACH(child, &bs->children, next) {
5995 if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
5996 BDRV_CHILD_FILTERED))
5997 {
Emanuele Giuseppe Esposito82618d72023-01-13 21:42:07 +01005998 child_size = bdrv_co_get_allocated_file_size(child->bs);
Max Reitz081e4652019-06-12 18:14:13 +02005999 if (child_size < 0) {
6000 return child_size;
6001 }
6002 sum += child_size;
6003 }
6004 }
6005
6006 return sum;
6007}
6008
6009/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08006010 * Length of a allocated file in bytes. Sparse files are counted by actual
6011 * allocated space. Return < 0 if error or unknown.
6012 */
Emanuele Giuseppe Esposito82618d72023-01-13 21:42:07 +01006013int64_t coroutine_fn bdrv_co_get_allocated_file_size(BlockDriverState *bs)
Fam Zheng4a1d5e12011-07-12 19:56:39 +08006014{
6015 BlockDriver *drv = bs->drv;
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006016 IO_CODE();
Emanuele Giuseppe Espositode335632023-05-04 13:57:43 +02006017 assert_bdrv_graph_readable();
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006018
Fam Zheng4a1d5e12011-07-12 19:56:39 +08006019 if (!drv) {
6020 return -ENOMEDIUM;
6021 }
Emanuele Giuseppe Esposito82618d72023-01-13 21:42:07 +01006022 if (drv->bdrv_co_get_allocated_file_size) {
6023 return drv->bdrv_co_get_allocated_file_size(bs);
Fam Zheng4a1d5e12011-07-12 19:56:39 +08006024 }
Max Reitz081e4652019-06-12 18:14:13 +02006025
Paolo Bonzini41770f62022-11-24 16:21:18 +01006026 if (drv->protocol_name) {
Max Reitz081e4652019-06-12 18:14:13 +02006027 /*
6028 * Protocol drivers default to -ENOTSUP (most of their data is
6029 * not stored in any of their children (if they even have any),
6030 * so there is no generic way to figure it out).
6031 */
6032 return -ENOTSUP;
6033 } else if (drv->is_filter) {
6034 /* Filter drivers default to the size of their filtered child */
Emanuele Giuseppe Esposito82618d72023-01-13 21:42:07 +01006035 return bdrv_co_get_allocated_file_size(bdrv_filter_bs(bs));
Max Reitz081e4652019-06-12 18:14:13 +02006036 } else {
6037 /* Other drivers default to summing their children's sizes */
6038 return bdrv_sum_allocated_file_size(bs);
Fam Zheng4a1d5e12011-07-12 19:56:39 +08006039 }
Fam Zheng4a1d5e12011-07-12 19:56:39 +08006040}
6041
Stefan Hajnoczi90880ff2017-07-05 13:57:30 +01006042/*
6043 * bdrv_measure:
6044 * @drv: Format driver
6045 * @opts: Creation options for new image
6046 * @in_bs: Existing image containing data for new image (may be NULL)
6047 * @errp: Error object
6048 * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo())
6049 * or NULL on error
6050 *
6051 * Calculate file size required to create a new image.
6052 *
6053 * If @in_bs is given then space for allocated clusters and zero clusters
6054 * from that image are included in the calculation. If @opts contains a
6055 * backing file that is shared by @in_bs then backing clusters may be omitted
6056 * from the calculation.
6057 *
6058 * If @in_bs is NULL then the calculation includes no allocated clusters
6059 * unless a preallocation option is given in @opts.
6060 *
6061 * Note that @in_bs may use a different BlockDriver from @drv.
6062 *
6063 * If an error occurs the @errp pointer is set.
6064 */
6065BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
6066 BlockDriverState *in_bs, Error **errp)
6067{
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006068 IO_CODE();
Stefan Hajnoczi90880ff2017-07-05 13:57:30 +01006069 if (!drv->bdrv_measure) {
6070 error_setg(errp, "Block driver '%s' does not support size measurement",
6071 drv->format_name);
6072 return NULL;
6073 }
6074
6075 return drv->bdrv_measure(opts, in_bs, errp);
6076}
6077
Fam Zheng4a1d5e12011-07-12 19:56:39 +08006078/**
Markus Armbruster65a9bb22014-06-26 13:23:17 +02006079 * Return number of sectors on success, -errno on error.
bellard83f64092006-08-01 16:21:11 +00006080 */
Emanuele Giuseppe Espositoc86422c2023-01-13 21:42:04 +01006081int64_t coroutine_fn bdrv_co_nb_sectors(BlockDriverState *bs)
bellard83f64092006-08-01 16:21:11 +00006082{
6083 BlockDriver *drv = bs->drv;
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006084 IO_CODE();
Kevin Wolf8ab81402023-02-03 16:22:02 +01006085 assert_bdrv_graph_readable();
Markus Armbruster65a9bb22014-06-26 13:23:17 +02006086
bellard83f64092006-08-01 16:21:11 +00006087 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00006088 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01006089
Paolo Bonzini160a29e2023-04-07 17:32:56 +02006090 if (bs->bl.has_variable_length) {
Emanuele Giuseppe Espositoc86422c2023-01-13 21:42:04 +01006091 int ret = bdrv_co_refresh_total_sectors(bs, bs->total_sectors);
Kevin Wolfb94a2612013-10-29 12:18:58 +01006092 if (ret < 0) {
6093 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01006094 }
bellard83f64092006-08-01 16:21:11 +00006095 }
Markus Armbruster65a9bb22014-06-26 13:23:17 +02006096 return bs->total_sectors;
6097}
6098
Paolo Bonzini81f730d2023-04-07 17:33:03 +02006099/*
6100 * This wrapper is written by hand because this function is in the hot I/O path,
6101 * via blk_get_geometry.
6102 */
6103int64_t coroutine_mixed_fn bdrv_nb_sectors(BlockDriverState *bs)
6104{
6105 BlockDriver *drv = bs->drv;
6106 IO_CODE();
6107
6108 if (!drv)
6109 return -ENOMEDIUM;
6110
6111 if (bs->bl.has_variable_length) {
6112 int ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
6113 if (ret < 0) {
6114 return ret;
6115 }
6116 }
6117
6118 return bs->total_sectors;
6119}
6120
Markus Armbruster65a9bb22014-06-26 13:23:17 +02006121/**
6122 * Return length in bytes on success, -errno on error.
6123 * The length is always a multiple of BDRV_SECTOR_SIZE.
6124 */
Emanuele Giuseppe Espositoc86422c2023-01-13 21:42:04 +01006125int64_t coroutine_fn bdrv_co_getlength(BlockDriverState *bs)
Markus Armbruster65a9bb22014-06-26 13:23:17 +02006126{
Emanuele Giuseppe Espositoc86422c2023-01-13 21:42:04 +01006127 int64_t ret;
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006128 IO_CODE();
Kevin Wolf8ab81402023-02-03 16:22:02 +01006129 assert_bdrv_graph_readable();
Markus Armbruster65a9bb22014-06-26 13:23:17 +02006130
Emanuele Giuseppe Espositoc86422c2023-01-13 21:42:04 +01006131 ret = bdrv_co_nb_sectors(bs);
Eric Blake122860b2020-11-05 09:51:22 -06006132 if (ret < 0) {
6133 return ret;
6134 }
6135 if (ret > INT64_MAX / BDRV_SECTOR_SIZE) {
6136 return -EFBIG;
6137 }
6138 return ret * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00006139}
6140
Eric Blake54115412016-06-23 16:37:26 -06006141bool bdrv_is_sg(BlockDriverState *bs)
ths985a03b2007-12-24 16:10:43 +00006142{
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006143 IO_CODE();
ths985a03b2007-12-24 16:10:43 +00006144 return bs->sg;
6145}
6146
Max Reitzae23f782019-06-12 22:57:15 +02006147/**
6148 * Return whether the given node supports compressed writes.
6149 */
6150bool bdrv_supports_compressed_writes(BlockDriverState *bs)
6151{
6152 BlockDriverState *filtered;
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006153 IO_CODE();
Max Reitzae23f782019-06-12 22:57:15 +02006154
6155 if (!bs->drv || !block_driver_can_compress(bs->drv)) {
6156 return false;
6157 }
6158
6159 filtered = bdrv_filter_bs(bs);
6160 if (filtered) {
6161 /*
6162 * Filters can only forward compressed writes, so we have to
6163 * check the child.
6164 */
6165 return bdrv_supports_compressed_writes(filtered);
6166 }
6167
6168 return true;
6169}
6170
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02006171const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00006172{
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006173 IO_CODE();
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02006174 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00006175}
6176
Stefan Hajnocziada42402014-08-27 12:08:55 +01006177static int qsort_strcmp(const void *a, const void *b)
6178{
Max Reitzceff5bd2016-10-12 22:49:05 +02006179 return strcmp(*(char *const *)a, *(char *const *)b);
Stefan Hajnocziada42402014-08-27 12:08:55 +01006180}
6181
ths5fafdf22007-09-16 21:08:06 +00006182void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
Andrey Shinkevich9ac404c2019-03-07 16:33:58 +03006183 void *opaque, bool read_only)
bellardea2384d2004-08-01 21:59:26 +00006184{
6185 BlockDriver *drv;
Jeff Codye855e4f2014-04-28 18:29:54 -04006186 int count = 0;
Stefan Hajnocziada42402014-08-27 12:08:55 +01006187 int i;
Jeff Codye855e4f2014-04-28 18:29:54 -04006188 const char **formats = NULL;
bellardea2384d2004-08-01 21:59:26 +00006189
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006190 GLOBAL_STATE_CODE();
6191
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01006192 QLIST_FOREACH(drv, &bdrv_drivers, list) {
Jeff Codye855e4f2014-04-28 18:29:54 -04006193 if (drv->format_name) {
6194 bool found = false;
Andrey Shinkevich9ac404c2019-03-07 16:33:58 +03006195
6196 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, read_only)) {
6197 continue;
6198 }
6199
Markus Armbrusterfb2575f2023-09-21 14:13:11 +02006200 i = count;
Jeff Codye855e4f2014-04-28 18:29:54 -04006201 while (formats && i && !found) {
6202 found = !strcmp(formats[--i], drv->format_name);
6203 }
6204
6205 if (!found) {
Markus Armbruster5839e532014-08-19 10:31:08 +02006206 formats = g_renew(const char *, formats, count + 1);
Jeff Codye855e4f2014-04-28 18:29:54 -04006207 formats[count++] = drv->format_name;
Jeff Codye855e4f2014-04-28 18:29:54 -04006208 }
6209 }
bellardea2384d2004-08-01 21:59:26 +00006210 }
Stefan Hajnocziada42402014-08-27 12:08:55 +01006211
Max Reitzeb0df692016-10-12 22:49:06 +02006212 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) {
6213 const char *format_name = block_driver_modules[i].format_name;
6214
6215 if (format_name) {
6216 bool found = false;
6217 int j = count;
6218
Andrey Shinkevich9ac404c2019-03-07 16:33:58 +03006219 if (use_bdrv_whitelist &&
6220 !bdrv_format_is_whitelisted(format_name, read_only)) {
6221 continue;
6222 }
6223
Max Reitzeb0df692016-10-12 22:49:06 +02006224 while (formats && j && !found) {
6225 found = !strcmp(formats[--j], format_name);
6226 }
6227
6228 if (!found) {
6229 formats = g_renew(const char *, formats, count + 1);
6230 formats[count++] = format_name;
6231 }
6232 }
6233 }
6234
Stefan Hajnocziada42402014-08-27 12:08:55 +01006235 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
6236
6237 for (i = 0; i < count; i++) {
6238 it(opaque, formats[i]);
6239 }
6240
Jeff Codye855e4f2014-04-28 18:29:54 -04006241 g_free(formats);
bellardea2384d2004-08-01 21:59:26 +00006242}
6243
Benoît Canetdc364f42014-01-23 21:31:32 +01006244/* This function is to find a node in the bs graph */
6245BlockDriverState *bdrv_find_node(const char *node_name)
6246{
6247 BlockDriverState *bs;
6248
6249 assert(node_name);
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006250 GLOBAL_STATE_CODE();
Benoît Canetdc364f42014-01-23 21:31:32 +01006251
6252 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
6253 if (!strcmp(node_name, bs->node_name)) {
6254 return bs;
6255 }
6256 }
6257 return NULL;
6258}
6259
Benoît Canetc13163f2014-01-23 21:31:34 +01006260/* Put this QMP function here so it can access the static graph_bdrv_states. */
Peter Krempafacda542020-01-20 09:50:49 +01006261BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
6262 Error **errp)
Benoît Canetc13163f2014-01-23 21:31:34 +01006263{
Eric Blake9812e712020-10-27 00:05:47 -05006264 BlockDeviceInfoList *list;
Benoît Canetc13163f2014-01-23 21:31:34 +01006265 BlockDriverState *bs;
6266
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006267 GLOBAL_STATE_CODE();
Kevin Wolfb7cfc7d2023-09-29 16:51:45 +02006268 GRAPH_RDLOCK_GUARD_MAINLOOP();
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006269
Benoît Canetc13163f2014-01-23 21:31:34 +01006270 list = NULL;
6271 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
Peter Krempafacda542020-01-20 09:50:49 +01006272 BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp);
Alberto Garciad5a8ee62015-04-17 14:52:43 +03006273 if (!info) {
6274 qapi_free_BlockDeviceInfoList(list);
6275 return NULL;
6276 }
Eric Blake9812e712020-10-27 00:05:47 -05006277 QAPI_LIST_PREPEND(list, info);
Benoît Canetc13163f2014-01-23 21:31:34 +01006278 }
6279
6280 return list;
6281}
6282
Vladimir Sementsov-Ogievskiy5d3b4e92018-12-21 20:09:07 +03006283typedef struct XDbgBlockGraphConstructor {
6284 XDbgBlockGraph *graph;
6285 GHashTable *graph_nodes;
6286} XDbgBlockGraphConstructor;
6287
6288static XDbgBlockGraphConstructor *xdbg_graph_new(void)
6289{
6290 XDbgBlockGraphConstructor *gr = g_new(XDbgBlockGraphConstructor, 1);
6291
6292 gr->graph = g_new0(XDbgBlockGraph, 1);
6293 gr->graph_nodes = g_hash_table_new(NULL, NULL);
6294
6295 return gr;
6296}
6297
6298static XDbgBlockGraph *xdbg_graph_finalize(XDbgBlockGraphConstructor *gr)
6299{
6300 XDbgBlockGraph *graph = gr->graph;
6301
6302 g_hash_table_destroy(gr->graph_nodes);
6303 g_free(gr);
6304
6305 return graph;
6306}
6307
6308static uintptr_t xdbg_graph_node_num(XDbgBlockGraphConstructor *gr, void *node)
6309{
6310 uintptr_t ret = (uintptr_t)g_hash_table_lookup(gr->graph_nodes, node);
6311
6312 if (ret != 0) {
6313 return ret;
6314 }
6315
6316 /*
6317 * Start counting from 1, not 0, because 0 interferes with not-found (NULL)
6318 * answer of g_hash_table_lookup.
6319 */
6320 ret = g_hash_table_size(gr->graph_nodes) + 1;
6321 g_hash_table_insert(gr->graph_nodes, node, (void *)ret);
6322
6323 return ret;
6324}
6325
6326static void xdbg_graph_add_node(XDbgBlockGraphConstructor *gr, void *node,
6327 XDbgBlockGraphNodeType type, const char *name)
6328{
6329 XDbgBlockGraphNode *n;
6330
6331 n = g_new0(XDbgBlockGraphNode, 1);
6332
6333 n->id = xdbg_graph_node_num(gr, node);
6334 n->type = type;
6335 n->name = g_strdup(name);
6336
Eric Blake9812e712020-10-27 00:05:47 -05006337 QAPI_LIST_PREPEND(gr->graph->nodes, n);
Vladimir Sementsov-Ogievskiy5d3b4e92018-12-21 20:09:07 +03006338}
6339
6340static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent,
6341 const BdrvChild *child)
6342{
Max Reitzcdb1cec2019-11-08 13:34:52 +01006343 BlockPermission qapi_perm;
Vladimir Sementsov-Ogievskiy5d3b4e92018-12-21 20:09:07 +03006344 XDbgBlockGraphEdge *edge;
Emanuele Giuseppe Esposito862fded2022-03-03 10:15:55 -05006345 GLOBAL_STATE_CODE();
Vladimir Sementsov-Ogievskiy5d3b4e92018-12-21 20:09:07 +03006346
Vladimir Sementsov-Ogievskiy5d3b4e92018-12-21 20:09:07 +03006347 edge = g_new0(XDbgBlockGraphEdge, 1);
6348
6349 edge->parent = xdbg_graph_node_num(gr, parent);
6350 edge->child = xdbg_graph_node_num(gr, child->bs);
6351 edge->name = g_strdup(child->name);
6352
Max Reitzcdb1cec2019-11-08 13:34:52 +01006353 for (qapi_perm = 0; qapi_perm < BLOCK_PERMISSION__MAX; qapi_perm++) {
6354 uint64_t flag = bdrv_qapi_perm_to_blk_perm(qapi_perm);
6355
6356 if (flag & child->perm) {
Eric Blake9812e712020-10-27 00:05:47 -05006357 QAPI_LIST_PREPEND(edge->perm, qapi_perm);
Vladimir Sementsov-Ogievskiy5d3b4e92018-12-21 20:09:07 +03006358 }
Max Reitzcdb1cec2019-11-08 13:34:52 +01006359 if (flag & child->shared_perm) {
Eric Blake9812e712020-10-27 00:05:47 -05006360 QAPI_LIST_PREPEND(edge->shared_perm, qapi_perm);
Vladimir Sementsov-Ogievskiy5d3b4e92018-12-21 20:09:07 +03006361 }
6362 }
6363
Eric Blake9812e712020-10-27 00:05:47 -05006364 QAPI_LIST_PREPEND(gr->graph->edges, edge);
Vladimir Sementsov-Ogievskiy5d3b4e92018-12-21 20:09:07 +03006365}
6366
6367
6368XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp)
6369{
6370 BlockBackend *blk;
6371 BlockJob *job;
6372 BlockDriverState *bs;
6373 BdrvChild *child;
6374 XDbgBlockGraphConstructor *gr = xdbg_graph_new();
6375
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006376 GLOBAL_STATE_CODE();
6377
Vladimir Sementsov-Ogievskiy5d3b4e92018-12-21 20:09:07 +03006378 for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
6379 char *allocated_name = NULL;
6380 const char *name = blk_name(blk);
6381
6382 if (!*name) {
6383 name = allocated_name = blk_get_attached_dev_id(blk);
6384 }
Markus Armbrusterbcd63b52024-09-04 13:18:20 +02006385 xdbg_graph_add_node(gr, blk, XDBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND,
Vladimir Sementsov-Ogievskiy5d3b4e92018-12-21 20:09:07 +03006386 name);
6387 g_free(allocated_name);
6388 if (blk_root(blk)) {
6389 xdbg_graph_add_edge(gr, blk, blk_root(blk));
6390 }
6391 }
6392
Emanuele Giuseppe Esposito880eeec2022-09-26 05:32:04 -04006393 WITH_JOB_LOCK_GUARD() {
6394 for (job = block_job_next_locked(NULL); job;
6395 job = block_job_next_locked(job)) {
6396 GSList *el;
Vladimir Sementsov-Ogievskiy5d3b4e92018-12-21 20:09:07 +03006397
Markus Armbrusterbcd63b52024-09-04 13:18:20 +02006398 xdbg_graph_add_node(gr, job, XDBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB,
Emanuele Giuseppe Esposito880eeec2022-09-26 05:32:04 -04006399 job->job.id);
6400 for (el = job->nodes; el; el = el->next) {
6401 xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data);
6402 }
Vladimir Sementsov-Ogievskiy5d3b4e92018-12-21 20:09:07 +03006403 }
6404 }
6405
6406 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
Markus Armbrusterbcd63b52024-09-04 13:18:20 +02006407 xdbg_graph_add_node(gr, bs, XDBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_DRIVER,
Vladimir Sementsov-Ogievskiy5d3b4e92018-12-21 20:09:07 +03006408 bs->node_name);
6409 QLIST_FOREACH(child, &bs->children, next) {
6410 xdbg_graph_add_edge(gr, bs, child);
6411 }
6412 }
6413
6414 return xdbg_graph_finalize(gr);
6415}
6416
Benoît Canet12d3ba82014-01-23 21:31:35 +01006417BlockDriverState *bdrv_lookup_bs(const char *device,
6418 const char *node_name,
6419 Error **errp)
6420{
Markus Armbruster7f06d472014-10-07 13:59:12 +02006421 BlockBackend *blk;
6422 BlockDriverState *bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01006423
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006424 GLOBAL_STATE_CODE();
6425
Benoît Canet12d3ba82014-01-23 21:31:35 +01006426 if (device) {
Markus Armbruster7f06d472014-10-07 13:59:12 +02006427 blk = blk_by_name(device);
Benoît Canet12d3ba82014-01-23 21:31:35 +01006428
Markus Armbruster7f06d472014-10-07 13:59:12 +02006429 if (blk) {
Alberto Garcia9f4ed6f2015-10-26 16:46:49 +02006430 bs = blk_bs(blk);
6431 if (!bs) {
Max Reitz5433c242015-10-19 17:53:29 +02006432 error_setg(errp, "Device '%s' has no medium", device);
Max Reitz5433c242015-10-19 17:53:29 +02006433 }
6434
Alberto Garcia9f4ed6f2015-10-26 16:46:49 +02006435 return bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01006436 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01006437 }
6438
Benoît Canetdd67fa52014-02-12 17:15:06 +01006439 if (node_name) {
6440 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01006441
Benoît Canetdd67fa52014-02-12 17:15:06 +01006442 if (bs) {
6443 return bs;
6444 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01006445 }
6446
Connor Kuehl785ec4b2021-03-05 09:19:28 -06006447 error_setg(errp, "Cannot find device=\'%s\' nor node-name=\'%s\'",
Benoît Canetdd67fa52014-02-12 17:15:06 +01006448 device ? device : "",
6449 node_name ? node_name : "");
6450 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01006451}
6452
Jeff Cody5a6684d2014-06-25 15:40:09 -04006453/* If 'base' is in the same chain as 'top', return true. Otherwise,
6454 * return false. If either argument is NULL, return false. */
6455bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
6456{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006457
6458 GLOBAL_STATE_CODE();
6459
Jeff Cody5a6684d2014-06-25 15:40:09 -04006460 while (top && top != base) {
Max Reitzdcf3f9b2019-06-12 17:34:45 +02006461 top = bdrv_filter_or_cow_bs(top);
Jeff Cody5a6684d2014-06-25 15:40:09 -04006462 }
6463
6464 return top != NULL;
6465}
6466
Fam Zheng04df7652014-10-31 11:32:54 +08006467BlockDriverState *bdrv_next_node(BlockDriverState *bs)
6468{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006469 GLOBAL_STATE_CODE();
Fam Zheng04df7652014-10-31 11:32:54 +08006470 if (!bs) {
6471 return QTAILQ_FIRST(&graph_bdrv_states);
6472 }
6473 return QTAILQ_NEXT(bs, node_list);
6474}
6475
Kevin Wolf0f122642018-03-28 18:29:18 +02006476BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
6477{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006478 GLOBAL_STATE_CODE();
Kevin Wolf0f122642018-03-28 18:29:18 +02006479 if (!bs) {
6480 return QTAILQ_FIRST(&all_bdrv_states);
6481 }
6482 return QTAILQ_NEXT(bs, bs_list);
6483}
6484
Fam Zheng20a9e772014-10-31 11:32:55 +08006485const char *bdrv_get_node_name(const BlockDriverState *bs)
6486{
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006487 IO_CODE();
Fam Zheng20a9e772014-10-31 11:32:55 +08006488 return bs->node_name;
6489}
6490
Kevin Wolf1f0c4612016-03-22 18:38:44 +01006491const char *bdrv_get_parent_name(const BlockDriverState *bs)
Kevin Wolf4c265bf2016-02-26 10:22:16 +01006492{
6493 BdrvChild *c;
6494 const char *name;
Emanuele Giuseppe Esposito967d7902022-03-03 10:15:58 -05006495 IO_CODE();
Kevin Wolf4c265bf2016-02-26 10:22:16 +01006496
6497 /* If multiple parents have a name, just pick the first one. */
6498 QLIST_FOREACH(c, &bs->parents, next_parent) {
Max Reitzbd86fb92020-05-13 13:05:13 +02006499 if (c->klass->get_name) {
6500 name = c->klass->get_name(c);
Kevin Wolf4c265bf2016-02-26 10:22:16 +01006501 if (name && *name) {
6502 return name;
6503 }
6504 }
6505 }
6506
6507 return NULL;
6508}
6509
Markus Armbruster7f06d472014-10-07 13:59:12 +02006510/* TODO check what callers really want: bs->node_name or blk_name() */
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02006511const char *bdrv_get_device_name(const BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00006512{
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006513 IO_CODE();
Kevin Wolf4c265bf2016-02-26 10:22:16 +01006514 return bdrv_get_parent_name(bs) ?: "";
bellardea2384d2004-08-01 21:59:26 +00006515}
6516
Alberto Garcia9b2aa842015-04-08 12:29:18 +03006517/* This can be used to identify nodes that might not have a device
6518 * name associated. Since node and device names live in the same
6519 * namespace, the result is unambiguous. The exception is if both are
6520 * absent, then this returns an empty (non-null) string. */
6521const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
6522{
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006523 IO_CODE();
Kevin Wolf4c265bf2016-02-26 10:22:16 +01006524 return bdrv_get_parent_name(bs) ?: bs->node_name;
Alberto Garcia9b2aa842015-04-08 12:29:18 +03006525}
6526
Markus Armbrusterc8433282012-06-05 16:49:24 +02006527int bdrv_get_flags(BlockDriverState *bs)
6528{
Hanna Reitz15aee7a2022-04-27 13:40:54 +02006529 IO_CODE();
Markus Armbrusterc8433282012-06-05 16:49:24 +02006530 return bs->open_flags;
6531}
6532
Peter Lieven3ac21622013-06-28 12:47:42 +02006533int bdrv_has_zero_init_1(BlockDriverState *bs)
6534{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006535 GLOBAL_STATE_CODE();
Peter Lieven3ac21622013-06-28 12:47:42 +02006536 return 1;
6537}
6538
Kevin Wolf06717982023-10-27 17:53:11 +02006539int coroutine_mixed_fn bdrv_has_zero_init(BlockDriverState *bs)
Kevin Wolff2feebb2010-04-14 17:30:35 +02006540{
Max Reitz93393e62019-06-12 17:03:38 +02006541 BlockDriverState *filtered;
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006542 GLOBAL_STATE_CODE();
Max Reitz93393e62019-06-12 17:03:38 +02006543
Max Reitzd470ad42017-11-10 21:31:09 +01006544 if (!bs->drv) {
6545 return 0;
6546 }
Kevin Wolff2feebb2010-04-14 17:30:35 +02006547
Paolo Bonzini11212d82013-09-04 19:00:27 +02006548 /* If BS is a copy on write image, it is initialized to
6549 the contents of the base image, which may not be zeroes. */
Max Reitz34778172019-06-12 17:10:46 +02006550 if (bdrv_cow_child(bs)) {
Paolo Bonzini11212d82013-09-04 19:00:27 +02006551 return 0;
6552 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02006553 if (bs->drv->bdrv_has_zero_init) {
6554 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02006555 }
Max Reitz93393e62019-06-12 17:03:38 +02006556
6557 filtered = bdrv_filter_bs(bs);
6558 if (filtered) {
6559 return bdrv_has_zero_init(filtered);
Manos Pitsidianakis5a612c02017-07-13 18:30:25 +03006560 }
Kevin Wolff2feebb2010-04-14 17:30:35 +02006561
Peter Lieven3ac21622013-06-28 12:47:42 +02006562 /* safe default */
6563 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02006564}
6565
Peter Lieven4ce78692013-10-24 12:06:54 +02006566bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
6567{
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006568 IO_CODE();
Denis V. Lunev2f0342e2016-07-14 16:33:26 +03006569 if (!(bs->open_flags & BDRV_O_UNMAP)) {
Peter Lieven4ce78692013-10-24 12:06:54 +02006570 return false;
6571 }
6572
Eric Blakee24d8132018-01-26 13:34:39 -06006573 return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP;
Peter Lieven4ce78692013-10-24 12:06:54 +02006574}
6575
ths5fafdf22007-09-16 21:08:06 +00006576void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00006577 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00006578{
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006579 IO_CODE();
Kevin Wolf3574c602011-10-26 11:02:11 +02006580 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00006581}
6582
Emanuele Giuseppe Esposito3d47eb02023-01-13 21:42:08 +01006583int coroutine_fn bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
bellardfaea38e2006-08-05 21:31:00 +00006584{
Vladimir Sementsov-Ogievskiy8b117002020-12-04 01:27:13 +03006585 int ret;
bellardfaea38e2006-08-05 21:31:00 +00006586 BlockDriver *drv = bs->drv;
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006587 IO_CODE();
Emanuele Giuseppe Espositoa00e70c2023-05-04 13:57:44 +02006588 assert_bdrv_graph_readable();
6589
Manos Pitsidianakis5a612c02017-07-13 18:30:25 +03006590 /* if bs->drv == NULL, bs is closed, so there's nothing to do here */
6591 if (!drv) {
bellard19cb3732006-08-19 11:45:59 +00006592 return -ENOMEDIUM;
Manos Pitsidianakis5a612c02017-07-13 18:30:25 +03006593 }
Emanuele Giuseppe Esposito3d47eb02023-01-13 21:42:08 +01006594 if (!drv->bdrv_co_get_info) {
Max Reitz93393e62019-06-12 17:03:38 +02006595 BlockDriverState *filtered = bdrv_filter_bs(bs);
6596 if (filtered) {
Emanuele Giuseppe Esposito3d47eb02023-01-13 21:42:08 +01006597 return bdrv_co_get_info(filtered, bdi);
Manos Pitsidianakis5a612c02017-07-13 18:30:25 +03006598 }
bellardfaea38e2006-08-05 21:31:00 +00006599 return -ENOTSUP;
Manos Pitsidianakis5a612c02017-07-13 18:30:25 +03006600 }
bellardfaea38e2006-08-05 21:31:00 +00006601 memset(bdi, 0, sizeof(*bdi));
Emanuele Giuseppe Esposito3d47eb02023-01-13 21:42:08 +01006602 ret = drv->bdrv_co_get_info(bs, bdi);
Andrey Drobyshevc54483b2023-07-11 20:25:51 +03006603 if (bdi->subcluster_size == 0) {
6604 /*
6605 * If the driver left this unset, subclusters are not supported.
6606 * Then it is safe to treat each cluster as having only one subcluster.
6607 */
6608 bdi->subcluster_size = bdi->cluster_size;
6609 }
Vladimir Sementsov-Ogievskiy8b117002020-12-04 01:27:13 +03006610 if (ret < 0) {
6611 return ret;
6612 }
6613
6614 if (bdi->cluster_size > BDRV_MAX_ALIGNMENT) {
6615 return -EINVAL;
6616 }
6617
6618 return 0;
bellardfaea38e2006-08-05 21:31:00 +00006619}
6620
Andrey Shinkevich1bf6e9c2019-02-08 18:06:06 +03006621ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
6622 Error **errp)
Max Reitzeae041f2013-10-09 10:46:16 +02006623{
6624 BlockDriver *drv = bs->drv;
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006625 IO_CODE();
Max Reitzeae041f2013-10-09 10:46:16 +02006626 if (drv && drv->bdrv_get_specific_info) {
Andrey Shinkevich1bf6e9c2019-02-08 18:06:06 +03006627 return drv->bdrv_get_specific_info(bs, errp);
Max Reitzeae041f2013-10-09 10:46:16 +02006628 }
6629 return NULL;
6630}
6631
Anton Nefedovd9245592019-09-23 15:17:37 +03006632BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs)
6633{
6634 BlockDriver *drv = bs->drv;
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006635 IO_CODE();
Anton Nefedovd9245592019-09-23 15:17:37 +03006636 if (!drv || !drv->bdrv_get_specific_stats) {
6637 return NULL;
6638 }
6639 return drv->bdrv_get_specific_stats(bs);
6640}
6641
Emanuele Giuseppe Espositoc834dc02023-01-13 21:42:11 +01006642void coroutine_fn bdrv_co_debug_event(BlockDriverState *bs, BlkdebugEvent event)
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01006643{
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05006644 IO_CODE();
Emanuele Giuseppe Espositocb2bfaa2023-05-04 13:57:45 +02006645 assert_bdrv_graph_readable();
6646
Emanuele Giuseppe Espositoc834dc02023-01-13 21:42:11 +01006647 if (!bs || !bs->drv || !bs->drv->bdrv_co_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01006648 return;
6649 }
6650
Emanuele Giuseppe Espositoc834dc02023-01-13 21:42:11 +01006651 bs->drv->bdrv_co_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01006652}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01006653
Kevin Wolfc0fc5122023-09-29 16:51:46 +02006654static BlockDriverState * GRAPH_RDLOCK
6655bdrv_find_debug_node(BlockDriverState *bs)
Kevin Wolf41c695c2012-12-06 14:32:58 +01006656{
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05006657 GLOBAL_STATE_CODE();
Kevin Wolf41c695c2012-12-06 14:32:58 +01006658 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
Max Reitzf706a922019-06-12 17:42:13 +02006659 bs = bdrv_primary_bs(bs);
Kevin Wolf41c695c2012-12-06 14:32:58 +01006660 }
6661
6662 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
Vladimir Sementsov-Ogievskiyd10529a2019-09-20 17:20:49 +03006663 assert(bs->drv->bdrv_debug_remove_breakpoint);
6664 return bs;
6665 }
6666
6667 return NULL;
6668}
6669
6670int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
6671 const char *tag)
6672{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006673 GLOBAL_STATE_CODE();
Kevin Wolfc0fc5122023-09-29 16:51:46 +02006674 GRAPH_RDLOCK_GUARD_MAINLOOP();
6675
Vladimir Sementsov-Ogievskiyd10529a2019-09-20 17:20:49 +03006676 bs = bdrv_find_debug_node(bs);
6677 if (bs) {
Kevin Wolf41c695c2012-12-06 14:32:58 +01006678 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
6679 }
6680
6681 return -ENOTSUP;
6682}
6683
Fam Zheng4cc70e92013-11-20 10:01:54 +08006684int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
6685{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006686 GLOBAL_STATE_CODE();
Kevin Wolfc0fc5122023-09-29 16:51:46 +02006687 GRAPH_RDLOCK_GUARD_MAINLOOP();
6688
Vladimir Sementsov-Ogievskiyd10529a2019-09-20 17:20:49 +03006689 bs = bdrv_find_debug_node(bs);
6690 if (bs) {
Fam Zheng4cc70e92013-11-20 10:01:54 +08006691 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
6692 }
6693
6694 return -ENOTSUP;
6695}
6696
Kevin Wolf41c695c2012-12-06 14:32:58 +01006697int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
6698{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006699 GLOBAL_STATE_CODE();
Kevin Wolfc0fc5122023-09-29 16:51:46 +02006700 GRAPH_RDLOCK_GUARD_MAINLOOP();
6701
Max Reitz938789e2014-03-10 23:44:08 +01006702 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
Max Reitzf706a922019-06-12 17:42:13 +02006703 bs = bdrv_primary_bs(bs);
Kevin Wolf41c695c2012-12-06 14:32:58 +01006704 }
6705
6706 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
6707 return bs->drv->bdrv_debug_resume(bs, tag);
6708 }
6709
6710 return -ENOTSUP;
6711}
6712
6713bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
6714{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006715 GLOBAL_STATE_CODE();
Kevin Wolfc0fc5122023-09-29 16:51:46 +02006716 GRAPH_RDLOCK_GUARD_MAINLOOP();
6717
Kevin Wolf41c695c2012-12-06 14:32:58 +01006718 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
Max Reitzf706a922019-06-12 17:42:13 +02006719 bs = bdrv_primary_bs(bs);
Kevin Wolf41c695c2012-12-06 14:32:58 +01006720 }
6721
6722 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
6723 return bs->drv->bdrv_debug_is_suspended(bs, tag);
6724 }
6725
6726 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01006727}
6728
Jeff Codyb1b1d782012-10-16 15:49:09 -04006729/* backing_file can either be relative, or absolute, or a protocol. If it is
6730 * relative, it must be relative to the chain. So, passing in bs->filename
6731 * from a BDS as backing_file should not be done, as that may be relative to
6732 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00006733BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
6734 const char *backing_file)
6735{
Jeff Codyb1b1d782012-10-16 15:49:09 -04006736 char *filename_full = NULL;
6737 char *backing_file_full = NULL;
6738 char *filename_tmp = NULL;
6739 int is_protocol = 0;
Max Reitz0b877d02018-08-01 20:34:11 +02006740 bool filenames_refreshed = false;
Jeff Codyb1b1d782012-10-16 15:49:09 -04006741 BlockDriverState *curr_bs = NULL;
6742 BlockDriverState *retval = NULL;
Max Reitzdcf3f9b2019-06-12 17:34:45 +02006743 BlockDriverState *bs_below;
Jeff Codyb1b1d782012-10-16 15:49:09 -04006744
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006745 GLOBAL_STATE_CODE();
Kevin Wolfb7cfc7d2023-09-29 16:51:45 +02006746 GRAPH_RDLOCK_GUARD_MAINLOOP();
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006747
Jeff Codyb1b1d782012-10-16 15:49:09 -04006748 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00006749 return NULL;
6750 }
6751
Jeff Codyb1b1d782012-10-16 15:49:09 -04006752 filename_full = g_malloc(PATH_MAX);
6753 backing_file_full = g_malloc(PATH_MAX);
Jeff Codyb1b1d782012-10-16 15:49:09 -04006754
6755 is_protocol = path_has_protocol(backing_file);
6756
Max Reitzdcf3f9b2019-06-12 17:34:45 +02006757 /*
6758 * Being largely a legacy function, skip any filters here
6759 * (because filters do not have normal filenames, so they cannot
6760 * match anyway; and allowing json:{} filenames is a bit out of
6761 * scope).
6762 */
6763 for (curr_bs = bdrv_skip_filters(bs);
6764 bdrv_cow_child(curr_bs) != NULL;
6765 curr_bs = bs_below)
6766 {
6767 bs_below = bdrv_backing_chain_next(curr_bs);
Jeff Codyb1b1d782012-10-16 15:49:09 -04006768
Max Reitz0b877d02018-08-01 20:34:11 +02006769 if (bdrv_backing_overridden(curr_bs)) {
6770 /*
6771 * If the backing file was overridden, we can only compare
6772 * directly against the backing node's filename.
6773 */
6774
6775 if (!filenames_refreshed) {
6776 /*
6777 * This will automatically refresh all of the
6778 * filenames in the rest of the backing chain, so we
6779 * only need to do this once.
6780 */
6781 bdrv_refresh_filename(bs_below);
6782 filenames_refreshed = true;
6783 }
6784
6785 if (strcmp(backing_file, bs_below->filename) == 0) {
6786 retval = bs_below;
6787 break;
6788 }
6789 } else if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
6790 /*
6791 * If either of the filename paths is actually a protocol, then
6792 * compare unmodified paths; otherwise make paths relative.
6793 */
Max Reitz6b6833c2019-02-01 20:29:15 +01006794 char *backing_file_full_ret;
6795
Jeff Codyb1b1d782012-10-16 15:49:09 -04006796 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
Max Reitzdcf3f9b2019-06-12 17:34:45 +02006797 retval = bs_below;
Jeff Codyb1b1d782012-10-16 15:49:09 -04006798 break;
6799 }
Jeff Cody418661e2017-01-25 20:08:20 -05006800 /* Also check against the full backing filename for the image */
Max Reitz6b6833c2019-02-01 20:29:15 +01006801 backing_file_full_ret = bdrv_get_full_backing_filename(curr_bs,
6802 NULL);
6803 if (backing_file_full_ret) {
6804 bool equal = strcmp(backing_file, backing_file_full_ret) == 0;
6805 g_free(backing_file_full_ret);
6806 if (equal) {
Max Reitzdcf3f9b2019-06-12 17:34:45 +02006807 retval = bs_below;
Jeff Cody418661e2017-01-25 20:08:20 -05006808 break;
6809 }
Jeff Cody418661e2017-01-25 20:08:20 -05006810 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00006811 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04006812 /* If not an absolute filename path, make it relative to the current
6813 * image's filename path */
Max Reitz2d9158c2019-02-01 20:29:17 +01006814 filename_tmp = bdrv_make_absolute_filename(curr_bs, backing_file,
6815 NULL);
6816 /* We are going to compare canonicalized absolute pathnames */
6817 if (!filename_tmp || !realpath(filename_tmp, filename_full)) {
6818 g_free(filename_tmp);
Jeff Codyb1b1d782012-10-16 15:49:09 -04006819 continue;
6820 }
Max Reitz2d9158c2019-02-01 20:29:17 +01006821 g_free(filename_tmp);
Jeff Codyb1b1d782012-10-16 15:49:09 -04006822
6823 /* We need to make sure the backing filename we are comparing against
6824 * is relative to the current image filename (or absolute) */
Max Reitz2d9158c2019-02-01 20:29:17 +01006825 filename_tmp = bdrv_get_full_backing_filename(curr_bs, NULL);
6826 if (!filename_tmp || !realpath(filename_tmp, backing_file_full)) {
6827 g_free(filename_tmp);
Jeff Codyb1b1d782012-10-16 15:49:09 -04006828 continue;
6829 }
Max Reitz2d9158c2019-02-01 20:29:17 +01006830 g_free(filename_tmp);
Jeff Codyb1b1d782012-10-16 15:49:09 -04006831
6832 if (strcmp(backing_file_full, filename_full) == 0) {
Max Reitzdcf3f9b2019-06-12 17:34:45 +02006833 retval = bs_below;
Jeff Codyb1b1d782012-10-16 15:49:09 -04006834 break;
6835 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00006836 }
6837 }
6838
Jeff Codyb1b1d782012-10-16 15:49:09 -04006839 g_free(filename_full);
6840 g_free(backing_file_full);
Jeff Codyb1b1d782012-10-16 15:49:09 -04006841 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00006842}
6843
bellardea2384d2004-08-01 21:59:26 +00006844void bdrv_init(void)
6845{
Kevin Wolfe5f05f82021-07-09 18:41:41 +02006846#ifdef CONFIG_BDRV_WHITELIST_TOOLS
6847 use_bdrv_whitelist = 1;
6848#endif
Anthony Liguori5efa9d52009-05-09 17:03:42 -05006849 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00006850}
pbrookce1a14d2006-08-07 02:38:06 +00006851
Markus Armbrustereb852012009-10-27 18:41:44 +01006852void bdrv_init_with_whitelist(void)
6853{
6854 use_bdrv_whitelist = 1;
6855 bdrv_init();
6856}
6857
Kevin Wolfaec81042025-02-04 22:13:52 +01006858bool bdrv_is_inactive(BlockDriverState *bs) {
6859 return bs->open_flags & BDRV_O_INACTIVE;
6860}
6861
Emanuele Giuseppe Espositoa94750d2022-02-09 05:54:50 -05006862int bdrv_activate(BlockDriverState *bs, Error **errp)
6863{
Kevin Wolf4417ab72017-05-04 18:52:37 +02006864 BdrvChild *child, *parent;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01006865 Error *local_err = NULL;
6866 int ret;
Vladimir Sementsov-Ogievskiy9c98f142018-10-29 16:23:17 -04006867 BdrvDirtyBitmap *bm;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01006868
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006869 GLOBAL_STATE_CODE();
Kevin Wolf3804e3c2023-09-11 11:46:12 +02006870 GRAPH_RDLOCK_GUARD_MAINLOOP();
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006871
Kevin Wolf3456a8d2014-03-11 10:58:39 +01006872 if (!bs->drv) {
Vladimir Sementsov-Ogievskiy54166452020-09-24 21:54:08 +03006873 return -ENOMEDIUM;
Anthony Liguori0f154232011-11-14 15:09:45 -06006874 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01006875
Vladimir Sementsov-Ogievskiy16e977d2017-01-31 14:23:08 +03006876 QLIST_FOREACH(child, &bs->children, next) {
Emanuele Giuseppe Esposito11d0c9b2022-02-09 05:54:52 -05006877 bdrv_activate(child->bs, &local_err);
Fam Zheng0d1c5c92016-05-11 10:45:33 +08006878 if (local_err) {
Fam Zheng0d1c5c92016-05-11 10:45:33 +08006879 error_propagate(errp, local_err);
Vladimir Sementsov-Ogievskiy54166452020-09-24 21:54:08 +03006880 return -EINVAL;
Fam Zheng0d1c5c92016-05-11 10:45:33 +08006881 }
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01006882 }
Fam Zheng0d1c5c92016-05-11 10:45:33 +08006883
Kevin Wolfdafe0962017-11-16 13:00:01 +01006884 /*
6885 * Update permissions, they may differ for inactive nodes.
6886 *
6887 * Note that the required permissions of inactive images are always a
6888 * subset of the permissions required after activating the image. This
6889 * allows us to just get the permissions upfront without restricting
Emanuele Giuseppe Esposito11d0c9b2022-02-09 05:54:52 -05006890 * bdrv_co_invalidate_cache().
Kevin Wolfdafe0962017-11-16 13:00:01 +01006891 *
6892 * It also means that in error cases, we don't have to try and revert to
6893 * the old permissions (which is an operation that could fail, too). We can
6894 * just keep the extended permissions for the next time that an activation
6895 * of the image is tried.
6896 */
Kevin Wolf7bb49412019-12-17 15:06:38 +01006897 if (bs->open_flags & BDRV_O_INACTIVE) {
6898 bs->open_flags &= ~BDRV_O_INACTIVE;
Vladimir Sementsov-Ogievskiyf1316ed2022-11-07 19:35:57 +03006899 ret = bdrv_refresh_perms(bs, NULL, errp);
Kevin Wolf7bb49412019-12-17 15:06:38 +01006900 if (ret < 0) {
Fam Zheng0d1c5c92016-05-11 10:45:33 +08006901 bs->open_flags |= BDRV_O_INACTIVE;
Vladimir Sementsov-Ogievskiy54166452020-09-24 21:54:08 +03006902 return ret;
Fam Zheng0d1c5c92016-05-11 10:45:33 +08006903 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01006904
Emanuele Giuseppe Esposito11d0c9b2022-02-09 05:54:52 -05006905 ret = bdrv_invalidate_cache(bs, errp);
6906 if (ret < 0) {
6907 bs->open_flags |= BDRV_O_INACTIVE;
6908 return ret;
Kevin Wolf7bb49412019-12-17 15:06:38 +01006909 }
Vladimir Sementsov-Ogievskiy9c98f142018-10-29 16:23:17 -04006910
Kevin Wolf7bb49412019-12-17 15:06:38 +01006911 FOR_EACH_DIRTY_BITMAP(bs, bm) {
6912 bdrv_dirty_bitmap_skip_store(bm, false);
6913 }
6914
Emanuele Giuseppe Espositoc0579602023-01-13 21:42:03 +01006915 ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
Kevin Wolf7bb49412019-12-17 15:06:38 +01006916 if (ret < 0) {
6917 bs->open_flags |= BDRV_O_INACTIVE;
6918 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Vladimir Sementsov-Ogievskiy54166452020-09-24 21:54:08 +03006919 return ret;
Kevin Wolf7bb49412019-12-17 15:06:38 +01006920 }
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01006921 }
Kevin Wolf4417ab72017-05-04 18:52:37 +02006922
6923 QLIST_FOREACH(parent, &bs->parents, next_parent) {
Max Reitzbd86fb92020-05-13 13:05:13 +02006924 if (parent->klass->activate) {
6925 parent->klass->activate(parent, &local_err);
Kevin Wolf4417ab72017-05-04 18:52:37 +02006926 if (local_err) {
Kevin Wolf78fc3b32019-01-31 15:16:10 +01006927 bs->open_flags |= BDRV_O_INACTIVE;
Kevin Wolf4417ab72017-05-04 18:52:37 +02006928 error_propagate(errp, local_err);
Vladimir Sementsov-Ogievskiy54166452020-09-24 21:54:08 +03006929 return -EINVAL;
Kevin Wolf4417ab72017-05-04 18:52:37 +02006930 }
6931 }
6932 }
Vladimir Sementsov-Ogievskiy54166452020-09-24 21:54:08 +03006933
6934 return 0;
Anthony Liguori0f154232011-11-14 15:09:45 -06006935}
6936
Emanuele Giuseppe Esposito11d0c9b2022-02-09 05:54:52 -05006937int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
6938{
6939 Error *local_err = NULL;
Emanuele Giuseppe Esposito1581a702022-03-03 10:16:09 -05006940 IO_CODE();
Emanuele Giuseppe Esposito11d0c9b2022-02-09 05:54:52 -05006941
6942 assert(!(bs->open_flags & BDRV_O_INACTIVE));
Kevin Wolf1b3ff9f2022-12-07 14:18:38 +01006943 assert_bdrv_graph_readable();
Emanuele Giuseppe Esposito11d0c9b2022-02-09 05:54:52 -05006944
6945 if (bs->drv->bdrv_co_invalidate_cache) {
6946 bs->drv->bdrv_co_invalidate_cache(bs, &local_err);
6947 if (local_err) {
6948 error_propagate(errp, local_err);
6949 return -EINVAL;
6950 }
6951 }
6952
6953 return 0;
6954}
6955
Emanuele Giuseppe Esposito3b717192022-02-09 05:54:51 -05006956void bdrv_activate_all(Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06006957{
Kevin Wolf7c8eece2016-03-22 18:58:50 +01006958 BlockDriverState *bs;
Kevin Wolf88be7b42016-05-20 18:49:07 +02006959 BdrvNextIterator it;
Anthony Liguori0f154232011-11-14 15:09:45 -06006960
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006961 GLOBAL_STATE_CODE();
Kevin Wolf2b3912f2023-09-29 16:51:39 +02006962 GRAPH_RDLOCK_GUARD_MAINLOOP();
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05006963
Kevin Wolf88be7b42016-05-20 18:49:07 +02006964 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
Vladimir Sementsov-Ogievskiy54166452020-09-24 21:54:08 +03006965 int ret;
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02006966
Emanuele Giuseppe Espositoa94750d2022-02-09 05:54:50 -05006967 ret = bdrv_activate(bs, errp);
Vladimir Sementsov-Ogievskiy54166452020-09-24 21:54:08 +03006968 if (ret < 0) {
Max Reitz5e003f12017-11-10 18:25:45 +01006969 bdrv_next_cleanup(&it);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01006970 return;
6971 }
Anthony Liguori0f154232011-11-14 15:09:45 -06006972 }
6973}
6974
Kevin Wolf0e6bad12023-09-29 16:51:38 +02006975static bool GRAPH_RDLOCK
6976bdrv_has_bds_parent(BlockDriverState *bs, bool only_active)
Kevin Wolf9e372712018-11-23 15:11:14 +01006977{
6978 BdrvChild *parent;
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05006979 GLOBAL_STATE_CODE();
Kevin Wolf9e372712018-11-23 15:11:14 +01006980
6981 QLIST_FOREACH(parent, &bs->parents, next_parent) {
Max Reitzbd86fb92020-05-13 13:05:13 +02006982 if (parent->klass->parent_is_bds) {
Kevin Wolf9e372712018-11-23 15:11:14 +01006983 BlockDriverState *parent_bs = parent->opaque;
6984 if (!only_active || !(parent_bs->open_flags & BDRV_O_INACTIVE)) {
6985 return true;
6986 }
6987 }
6988 }
6989
6990 return false;
6991}
6992
Kevin Wolfa6490ec2025-02-04 22:13:53 +01006993static int GRAPH_RDLOCK
6994bdrv_inactivate_recurse(BlockDriverState *bs, bool top_level)
Kevin Wolf76b1c7f2015-12-22 14:07:08 +01006995{
Kevin Wolfcfa1a572017-05-04 18:52:38 +02006996 BdrvChild *child, *parent;
Kevin Wolf76b1c7f2015-12-22 14:07:08 +01006997 int ret;
Vladimir Sementsov-Ogievskiya13de402021-09-11 15:00:27 +03006998 uint64_t cumulative_perms, cumulative_shared_perms;
Kevin Wolf76b1c7f2015-12-22 14:07:08 +01006999
Emanuele Giuseppe Espositoda359902022-03-03 10:16:11 -05007000 GLOBAL_STATE_CODE();
7001
Fiona Ebner841998e2025-05-30 17:10:41 +02007002 assert(bs->quiesce_counter > 0);
7003
Max Reitzd470ad42017-11-10 21:31:09 +01007004 if (!bs->drv) {
7005 return -ENOMEDIUM;
7006 }
7007
Kevin Wolf9e372712018-11-23 15:11:14 +01007008 /* Make sure that we don't inactivate a child before its parent.
7009 * It will be covered by recursion from the yet active parent. */
7010 if (bdrv_has_bds_parent(bs, true)) {
7011 return 0;
7012 }
7013
Kevin Wolfa6490ec2025-02-04 22:13:53 +01007014 /*
7015 * Inactivating an already inactive node on user request is harmless, but if
7016 * a child is already inactive before its parent, that's bad.
7017 */
7018 if (bs->open_flags & BDRV_O_INACTIVE) {
7019 assert(top_level);
7020 return 0;
7021 }
Kevin Wolf9e372712018-11-23 15:11:14 +01007022
7023 /* Inactivate this node */
7024 if (bs->drv->bdrv_inactivate) {
Kevin Wolf76b1c7f2015-12-22 14:07:08 +01007025 ret = bs->drv->bdrv_inactivate(bs);
7026 if (ret < 0) {
7027 return ret;
7028 }
7029 }
7030
Kevin Wolf9e372712018-11-23 15:11:14 +01007031 QLIST_FOREACH(parent, &bs->parents, next_parent) {
Max Reitzbd86fb92020-05-13 13:05:13 +02007032 if (parent->klass->inactivate) {
7033 ret = parent->klass->inactivate(parent);
Kevin Wolf9e372712018-11-23 15:11:14 +01007034 if (ret < 0) {
7035 return ret;
Kevin Wolfcfa1a572017-05-04 18:52:38 +02007036 }
7037 }
Fam Zhengaad0b7a2016-05-11 10:45:35 +08007038 }
Kevin Wolf38701b62017-05-04 18:52:39 +02007039
Vladimir Sementsov-Ogievskiya13de402021-09-11 15:00:27 +03007040 bdrv_get_cumulative_perm(bs, &cumulative_perms,
7041 &cumulative_shared_perms);
7042 if (cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
7043 /* Our inactive parents still need write access. Inactivation failed. */
7044 return -EPERM;
7045 }
7046
Kevin Wolf9e372712018-11-23 15:11:14 +01007047 bs->open_flags |= BDRV_O_INACTIVE;
7048
Vladimir Sementsov-Ogievskiybb87e4d2020-11-06 15:42:38 +03007049 /*
7050 * Update permissions, they may differ for inactive nodes.
7051 * We only tried to loosen restrictions, so errors are not fatal, ignore
7052 * them.
7053 */
Vladimir Sementsov-Ogievskiyf1316ed2022-11-07 19:35:57 +03007054 bdrv_refresh_perms(bs, NULL, NULL);
Kevin Wolf9e372712018-11-23 15:11:14 +01007055
7056 /* Recursively inactivate children */
Kevin Wolf38701b62017-05-04 18:52:39 +02007057 QLIST_FOREACH(child, &bs->children, next) {
Kevin Wolfa6490ec2025-02-04 22:13:53 +01007058 ret = bdrv_inactivate_recurse(child->bs, false);
Kevin Wolf38701b62017-05-04 18:52:39 +02007059 if (ret < 0) {
7060 return ret;
7061 }
7062 }
7063
Kevin Wolf76b1c7f2015-12-22 14:07:08 +01007064 return 0;
7065}
7066
Kevin Wolf8cd37202025-02-04 22:13:59 +01007067int bdrv_inactivate(BlockDriverState *bs, Error **errp)
7068{
7069 int ret;
7070
7071 GLOBAL_STATE_CODE();
Fiona Ebner841998e2025-05-30 17:10:41 +02007072
7073 bdrv_drain_all_begin();
7074 bdrv_graph_rdlock_main_loop();
Kevin Wolf8cd37202025-02-04 22:13:59 +01007075
7076 if (bdrv_has_bds_parent(bs, true)) {
7077 error_setg(errp, "Node has active parent node");
Fiona Ebner841998e2025-05-30 17:10:41 +02007078 ret = -EPERM;
7079 goto out;
Kevin Wolf8cd37202025-02-04 22:13:59 +01007080 }
7081
7082 ret = bdrv_inactivate_recurse(bs, true);
7083 if (ret < 0) {
7084 error_setg_errno(errp, -ret, "Failed to inactivate node");
Fiona Ebner841998e2025-05-30 17:10:41 +02007085 goto out;
Kevin Wolf8cd37202025-02-04 22:13:59 +01007086 }
7087
Fiona Ebner841998e2025-05-30 17:10:41 +02007088out:
7089 bdrv_graph_rdunlock_main_loop();
7090 bdrv_drain_all_end();
7091 return ret;
Kevin Wolf8cd37202025-02-04 22:13:59 +01007092}
7093
Kevin Wolf76b1c7f2015-12-22 14:07:08 +01007094int bdrv_inactivate_all(void)
7095{
Max Reitz79720af2016-03-16 19:54:44 +01007096 BlockDriverState *bs = NULL;
Kevin Wolf88be7b42016-05-20 18:49:07 +02007097 BdrvNextIterator it;
Fam Zhengaad0b7a2016-05-11 10:45:35 +08007098 int ret = 0;
Kevin Wolf76b1c7f2015-12-22 14:07:08 +01007099
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007100 GLOBAL_STATE_CODE();
Fiona Ebner841998e2025-05-30 17:10:41 +02007101
7102 bdrv_drain_all_begin();
7103 bdrv_graph_rdlock_main_loop();
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007104
Kevin Wolf88be7b42016-05-20 18:49:07 +02007105 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
Kevin Wolf9e372712018-11-23 15:11:14 +01007106 /* Nodes with BDS parents are covered by recursion from the last
7107 * parent that gets inactivated. Don't inactivate them a second
7108 * time if that has already happened. */
7109 if (bdrv_has_bds_parent(bs, false)) {
7110 continue;
7111 }
Kevin Wolfa6490ec2025-02-04 22:13:53 +01007112 ret = bdrv_inactivate_recurse(bs, true);
Kevin Wolf9e372712018-11-23 15:11:14 +01007113 if (ret < 0) {
7114 bdrv_next_cleanup(&it);
Stefan Hajnoczib49f4752023-12-05 13:20:03 -05007115 break;
Kevin Wolf76b1c7f2015-12-22 14:07:08 +01007116 }
7117 }
7118
Fiona Ebner841998e2025-05-30 17:10:41 +02007119 bdrv_graph_rdunlock_main_loop();
7120 bdrv_drain_all_end();
7121
Fam Zhengaad0b7a2016-05-11 10:45:35 +08007122 return ret;
Kevin Wolf76b1c7f2015-12-22 14:07:08 +01007123}
7124
Kevin Wolff9f05dc2011-07-15 13:50:26 +02007125/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00007126/* removable device support */
7127
7128/**
7129 * Return TRUE if the media is present
7130 */
Emanuele Giuseppe Esposito1e97be92023-01-13 21:42:02 +01007131bool coroutine_fn bdrv_co_is_inserted(BlockDriverState *bs)
bellard19cb3732006-08-19 11:45:59 +00007132{
7133 BlockDriver *drv = bs->drv;
Max Reitz28d7a782015-10-19 17:53:13 +02007134 BdrvChild *child;
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05007135 IO_CODE();
Emanuele Giuseppe Espositoc73ff922023-02-03 16:21:57 +01007136 assert_bdrv_graph_readable();
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02007137
Max Reitze031f752015-10-19 17:53:11 +02007138 if (!drv) {
7139 return false;
7140 }
Emanuele Giuseppe Esposito1e97be92023-01-13 21:42:02 +01007141 if (drv->bdrv_co_is_inserted) {
7142 return drv->bdrv_co_is_inserted(bs);
Max Reitze031f752015-10-19 17:53:11 +02007143 }
Max Reitz28d7a782015-10-19 17:53:13 +02007144 QLIST_FOREACH(child, &bs->children, next) {
Emanuele Giuseppe Esposito1e97be92023-01-13 21:42:02 +01007145 if (!bdrv_co_is_inserted(child->bs)) {
Max Reitz28d7a782015-10-19 17:53:13 +02007146 return false;
7147 }
7148 }
7149 return true;
bellard19cb3732006-08-19 11:45:59 +00007150}
7151
7152/**
bellard19cb3732006-08-19 11:45:59 +00007153 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
7154 */
Emanuele Giuseppe Esposito2531b392023-01-13 21:42:09 +01007155void coroutine_fn bdrv_co_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00007156{
7157 BlockDriver *drv = bs->drv;
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05007158 IO_CODE();
Kevin Wolf79a292e2023-02-03 16:21:58 +01007159 assert_bdrv_graph_readable();
bellard19cb3732006-08-19 11:45:59 +00007160
Emanuele Giuseppe Esposito2531b392023-01-13 21:42:09 +01007161 if (drv && drv->bdrv_co_eject) {
7162 drv->bdrv_co_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00007163 }
bellard19cb3732006-08-19 11:45:59 +00007164}
7165
bellard19cb3732006-08-19 11:45:59 +00007166/**
7167 * Lock or unlock the media (if it is locked, the user won't be able
7168 * to eject it manually).
7169 */
Emanuele Giuseppe Esposito2c752612023-01-13 21:42:10 +01007170void coroutine_fn bdrv_co_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00007171{
7172 BlockDriver *drv = bs->drv;
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05007173 IO_CODE();
Kevin Wolf79a292e2023-02-03 16:21:58 +01007174 assert_bdrv_graph_readable();
Markus Armbruster025e8492011-09-06 18:58:47 +02007175 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01007176
Emanuele Giuseppe Esposito2c752612023-01-13 21:42:10 +01007177 if (drv && drv->bdrv_co_lock_medium) {
7178 drv->bdrv_co_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00007179 }
7180}
ths985a03b2007-12-24 16:10:43 +00007181
Fam Zheng9fcb0252013-08-23 09:14:46 +08007182/* Get a reference to bs */
7183void bdrv_ref(BlockDriverState *bs)
7184{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007185 GLOBAL_STATE_CODE();
Fam Zheng9fcb0252013-08-23 09:14:46 +08007186 bs->refcnt++;
7187}
7188
7189/* Release a previously grabbed reference to bs.
7190 * If after releasing, reference count is zero, the BlockDriverState is
7191 * deleted. */
7192void bdrv_unref(BlockDriverState *bs)
7193{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007194 GLOBAL_STATE_CODE();
Jeff Cody9a4d5ca2014-07-23 17:22:57 -04007195 if (!bs) {
7196 return;
7197 }
Fam Zheng9fcb0252013-08-23 09:14:46 +08007198 assert(bs->refcnt > 0);
7199 if (--bs->refcnt == 0) {
7200 bdrv_delete(bs);
7201 }
7202}
7203
Kevin Wolf6bc0bcc2023-11-15 18:20:10 +01007204static void bdrv_schedule_unref_bh(void *opaque)
7205{
7206 BlockDriverState *bs = opaque;
Kevin Wolf6bc0bcc2023-11-15 18:20:10 +01007207
Kevin Wolf6bc0bcc2023-11-15 18:20:10 +01007208 bdrv_unref(bs);
Kevin Wolf6bc0bcc2023-11-15 18:20:10 +01007209}
7210
Kevin Wolfac2ae232023-09-11 11:46:04 +02007211/*
7212 * Release a BlockDriverState reference while holding the graph write lock.
7213 *
7214 * Calling bdrv_unref() directly is forbidden while holding the graph lock
7215 * because bdrv_close() both involves polling and taking the graph lock
7216 * internally. bdrv_schedule_unref() instead delays decreasing the refcount and
7217 * possibly closing @bs until the graph lock is released.
7218 */
7219void bdrv_schedule_unref(BlockDriverState *bs)
7220{
7221 if (!bs) {
7222 return;
7223 }
Kevin Wolf6bc0bcc2023-11-15 18:20:10 +01007224 aio_bh_schedule_oneshot(qemu_get_aio_context(), bdrv_schedule_unref_bh, bs);
Kevin Wolfac2ae232023-09-11 11:46:04 +02007225}
7226
Fam Zhengfbe40ff2014-05-23 21:29:42 +08007227struct BdrvOpBlocker {
7228 Error *reason;
7229 QLIST_ENTRY(BdrvOpBlocker) list;
7230};
7231
7232bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
7233{
7234 BdrvOpBlocker *blocker;
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007235 GLOBAL_STATE_CODE();
Kevin Wolf0bb79c92023-09-29 16:51:49 +02007236
Fam Zhengfbe40ff2014-05-23 21:29:42 +08007237 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
7238 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
7239 blocker = QLIST_FIRST(&bs->op_blockers[op]);
Markus Armbruster4b576642018-10-17 10:26:25 +02007240 error_propagate_prepend(errp, error_copy(blocker->reason),
7241 "Node '%s' is busy: ",
7242 bdrv_get_device_or_node_name(bs));
Fam Zhengfbe40ff2014-05-23 21:29:42 +08007243 return true;
7244 }
7245 return false;
7246}
7247
7248void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
7249{
7250 BdrvOpBlocker *blocker;
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007251 GLOBAL_STATE_CODE();
Fam Zhengfbe40ff2014-05-23 21:29:42 +08007252 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
7253
Markus Armbruster5839e532014-08-19 10:31:08 +02007254 blocker = g_new0(BdrvOpBlocker, 1);
Fam Zhengfbe40ff2014-05-23 21:29:42 +08007255 blocker->reason = reason;
7256 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
7257}
7258
7259void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
7260{
7261 BdrvOpBlocker *blocker, *next;
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007262 GLOBAL_STATE_CODE();
Fam Zhengfbe40ff2014-05-23 21:29:42 +08007263 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
7264 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
7265 if (blocker->reason == reason) {
7266 QLIST_REMOVE(blocker, list);
7267 g_free(blocker);
7268 }
7269 }
7270}
7271
7272void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
7273{
7274 int i;
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007275 GLOBAL_STATE_CODE();
Fam Zhengfbe40ff2014-05-23 21:29:42 +08007276 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
7277 bdrv_op_block(bs, i, reason);
7278 }
7279}
7280
7281void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
7282{
7283 int i;
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007284 GLOBAL_STATE_CODE();
Fam Zhengfbe40ff2014-05-23 21:29:42 +08007285 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
7286 bdrv_op_unblock(bs, i, reason);
7287 }
7288}
7289
7290bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
7291{
7292 int i;
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007293 GLOBAL_STATE_CODE();
Fam Zhengfbe40ff2014-05-23 21:29:42 +08007294 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
7295 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
7296 return false;
7297 }
7298 }
7299 return true;
7300}
7301
Luiz Capitulinod92ada22012-11-30 10:52:09 -02007302void bdrv_img_create(const char *filename, const char *fmt,
7303 const char *base_filename, const char *base_fmt,
Fam Zheng92172832017-04-21 20:27:01 +08007304 char *options, uint64_t img_size, int flags, bool quiet,
7305 Error **errp)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007306{
Chunyan Liu83d05212014-06-05 17:20:51 +08007307 QemuOptsList *create_opts = NULL;
7308 QemuOpts *opts = NULL;
7309 const char *backing_fmt, *backing_file;
7310 int64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007311 BlockDriver *drv, *proto_drv;
Max Reitzcc84d902013-09-06 17:14:26 +02007312 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007313 int ret = 0;
7314
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007315 GLOBAL_STATE_CODE();
7316
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007317 /* Find driver and parse its options */
7318 drv = bdrv_find_format(fmt);
7319 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02007320 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02007321 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007322 }
7323
Max Reitzb65a5e12015-02-05 13:58:12 -05007324 proto_drv = bdrv_find_protocol(filename, true, errp);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007325 if (!proto_drv) {
Luiz Capitulinod92ada22012-11-30 10:52:09 -02007326 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007327 }
7328
Max Reitzc6149722014-12-02 18:32:45 +01007329 if (!drv->create_opts) {
7330 error_setg(errp, "Format driver '%s' does not support image creation",
7331 drv->format_name);
7332 return;
7333 }
7334
Maxim Levitsky5a5e7f82020-03-26 03:12:18 +02007335 if (!proto_drv->create_opts) {
7336 error_setg(errp, "Protocol driver '%s' does not support image creation",
7337 proto_drv->format_name);
7338 return;
7339 }
7340
Kevin Wolff6dc1c32019-11-26 16:45:49 +01007341 /* Create parameter list */
Chunyan Liuc282e1f2014-06-05 17:21:11 +08007342 create_opts = qemu_opts_append(create_opts, drv->create_opts);
Maxim Levitsky5a5e7f82020-03-26 03:12:18 +02007343 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007344
Chunyan Liu83d05212014-06-05 17:20:51 +08007345 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007346
7347 /* Parse -o options */
7348 if (options) {
Markus Armbrustera5f9b9d2020-07-07 18:06:05 +02007349 if (!qemu_opts_do_parse(opts, options, NULL, errp)) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007350 goto out;
7351 }
7352 }
7353
Kevin Wolff6dc1c32019-11-26 16:45:49 +01007354 if (!qemu_opt_get(opts, BLOCK_OPT_SIZE)) {
7355 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
7356 } else if (img_size != UINT64_C(-1)) {
7357 error_setg(errp, "The image size must be specified only once");
7358 goto out;
7359 }
7360
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007361 if (base_filename) {
Markus Armbruster235e59c2020-07-07 18:05:42 +02007362 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename,
Markus Armbruster38825782020-07-07 18:05:43 +02007363 NULL)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02007364 error_setg(errp, "Backing file not supported for file format '%s'",
7365 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007366 goto out;
7367 }
7368 }
7369
7370 if (base_fmt) {
Markus Armbruster38825782020-07-07 18:05:43 +02007371 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02007372 error_setg(errp, "Backing file format not supported for file "
7373 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007374 goto out;
7375 }
7376 }
7377
Chunyan Liu83d05212014-06-05 17:20:51 +08007378 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
7379 if (backing_file) {
7380 if (!strcmp(filename, backing_file)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02007381 error_setg(errp, "Error: Trying to create an image with the "
7382 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01007383 goto out;
7384 }
Connor Kuehl975a7bd2020-08-13 08:47:22 -05007385 if (backing_file[0] == '\0') {
7386 error_setg(errp, "Expected backing file name, got empty string");
7387 goto out;
7388 }
Jes Sorensen792da932010-12-16 13:52:17 +01007389 }
7390
Chunyan Liu83d05212014-06-05 17:20:51 +08007391 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007392
John Snow6e6e55f2017-07-17 20:34:22 -04007393 /* The size for the image must always be specified, unless we have a backing
7394 * file and we have not been forbidden from opening it. */
Eric Blakea8b42a12017-09-25 09:55:07 -05007395 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size);
John Snow6e6e55f2017-07-17 20:34:22 -04007396 if (backing_file && !(flags & BDRV_O_NO_BACKING)) {
7397 BlockDriverState *bs;
Max Reitz645ae7d2019-02-01 20:29:14 +01007398 char *full_backing;
John Snow6e6e55f2017-07-17 20:34:22 -04007399 int back_flags;
7400 QDict *backing_options = NULL;
Paolo Bonzini63090da2012-04-12 14:01:03 +02007401
Max Reitz645ae7d2019-02-01 20:29:14 +01007402 full_backing =
7403 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
7404 &local_err);
John Snow6e6e55f2017-07-17 20:34:22 -04007405 if (local_err) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007406 goto out;
7407 }
Max Reitz645ae7d2019-02-01 20:29:14 +01007408 assert(full_backing);
John Snow6e6e55f2017-07-17 20:34:22 -04007409
Max Reitzd5b23992021-06-22 16:00:30 +02007410 /*
7411 * No need to do I/O here, which allows us to open encrypted
7412 * backing images without needing the secret
7413 */
John Snow6e6e55f2017-07-17 20:34:22 -04007414 back_flags = flags;
7415 back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Max Reitzd5b23992021-06-22 16:00:30 +02007416 back_flags |= BDRV_O_NO_IO;
John Snow6e6e55f2017-07-17 20:34:22 -04007417
Fam Zhengcc954f02017-12-15 16:04:45 +08007418 backing_options = qdict_new();
John Snow6e6e55f2017-07-17 20:34:22 -04007419 if (backing_fmt) {
John Snow6e6e55f2017-07-17 20:34:22 -04007420 qdict_put_str(backing_options, "driver", backing_fmt);
7421 }
Fam Zhengcc954f02017-12-15 16:04:45 +08007422 qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true);
John Snow6e6e55f2017-07-17 20:34:22 -04007423
7424 bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
7425 &local_err);
7426 g_free(full_backing);
Eric Blakeadd82002020-07-06 15:39:50 -05007427 if (!bs) {
7428 error_append_hint(&local_err, "Could not open backing image.\n");
John Snow6e6e55f2017-07-17 20:34:22 -04007429 goto out;
7430 } else {
Eric Blaked9f059a2020-07-06 15:39:54 -05007431 if (!backing_fmt) {
Eric Blake497a30d2021-05-03 14:36:00 -07007432 error_setg(&local_err,
7433 "Backing file specified without backing format");
Michael Tokarevfbdffb02023-04-05 16:34:04 +03007434 error_append_hint(&local_err, "Detected format of %s.\n",
Eric Blake497a30d2021-05-03 14:36:00 -07007435 bs->drv->format_name);
7436 goto out;
Eric Blaked9f059a2020-07-06 15:39:54 -05007437 }
John Snow6e6e55f2017-07-17 20:34:22 -04007438 if (size == -1) {
7439 /* Opened BS, have no size */
7440 size = bdrv_getlength(bs);
7441 if (size < 0) {
7442 error_setg_errno(errp, -size, "Could not get size of '%s'",
7443 backing_file);
7444 bdrv_unref(bs);
7445 goto out;
7446 }
7447 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
7448 }
7449 bdrv_unref(bs);
7450 }
Eric Blaked9f059a2020-07-06 15:39:54 -05007451 /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */
7452 } else if (backing_file && !backing_fmt) {
Eric Blake497a30d2021-05-03 14:36:00 -07007453 error_setg(&local_err,
7454 "Backing file specified without backing format");
7455 goto out;
Eric Blaked9f059a2020-07-06 15:39:54 -05007456 }
John Snow6e6e55f2017-07-17 20:34:22 -04007457
Hyman Huang35286da2024-01-30 13:37:23 +08007458 /* Parameter 'size' is not needed for detached LUKS header */
7459 if (size == -1 &&
7460 !(!strcmp(fmt, "luks") &&
7461 qemu_opt_get_bool(opts, "detached-header", false))) {
John Snow6e6e55f2017-07-17 20:34:22 -04007462 error_setg(errp, "Image creation needs a size parameter");
7463 goto out;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007464 }
7465
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01007466 if (!quiet) {
Kővágó, Zoltánfe646692015-07-07 16:42:10 +02007467 printf("Formatting '%s', fmt=%s ", filename, fmt);
Fam Zheng43c5d8f2014-12-09 15:38:04 +08007468 qemu_opts_print(opts, " ");
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01007469 puts("");
Eric Blake4e2f4412020-07-06 15:39:45 -05007470 fflush(stdout);
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01007471 }
Chunyan Liu83d05212014-06-05 17:20:51 +08007472
Chunyan Liuc282e1f2014-06-05 17:21:11 +08007473 ret = bdrv_create(drv, filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08007474
Max Reitzcc84d902013-09-06 17:14:26 +02007475 if (ret == -EFBIG) {
7476 /* This is generally a better message than whatever the driver would
7477 * deliver (especially because of the cluster_size_hint), since that
7478 * is most probably not much different from "image too large". */
7479 const char *cluster_size_hint = "";
Chunyan Liu83d05212014-06-05 17:20:51 +08007480 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
Max Reitzcc84d902013-09-06 17:14:26 +02007481 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007482 }
Max Reitzcc84d902013-09-06 17:14:26 +02007483 error_setg(errp, "The image size is too large for file format '%s'"
7484 "%s", fmt, cluster_size_hint);
7485 error_free(local_err);
7486 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007487 }
7488
7489out:
Chunyan Liu83d05212014-06-05 17:20:51 +08007490 qemu_opts_del(opts);
7491 qemu_opts_free(create_opts);
Eduardo Habkost621ff942016-06-13 18:57:56 -03007492 error_propagate(errp, local_err);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01007493}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01007494
7495AioContext *bdrv_get_aio_context(BlockDriverState *bs)
7496{
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05007497 IO_CODE();
Stefan Hajnoczi33f2a752018-02-16 16:50:13 +00007498 return bs ? bs->aio_context : qemu_get_aio_context();
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02007499}
7500
Kevin Wolfe336fd42020-10-05 17:58:53 +02007501AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs)
7502{
7503 Coroutine *self = qemu_coroutine_self();
7504 AioContext *old_ctx = qemu_coroutine_get_aio_context(self);
7505 AioContext *new_ctx;
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05007506 IO_CODE();
Kevin Wolfe336fd42020-10-05 17:58:53 +02007507
7508 /*
7509 * Increase bs->in_flight to ensure that this operation is completed before
7510 * moving the node to a different AioContext. Read new_ctx only afterwards.
7511 */
7512 bdrv_inc_in_flight(bs);
7513
7514 new_ctx = bdrv_get_aio_context(bs);
7515 aio_co_reschedule_self(new_ctx);
7516 return old_ctx;
7517}
7518
7519void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
7520{
Emanuele Giuseppe Esposito384a48f2022-03-03 10:15:50 -05007521 IO_CODE();
Kevin Wolfe336fd42020-10-05 17:58:53 +02007522 aio_co_reschedule_self(old_ctx);
7523 bdrv_dec_in_flight(bs);
7524}
7525
Stefan Hajnoczie8a095d2016-06-16 17:56:26 +01007526static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
7527{
Emanuele Giuseppe Espositobdb73472022-03-03 10:16:02 -05007528 GLOBAL_STATE_CODE();
Stefan Hajnoczie8a095d2016-06-16 17:56:26 +01007529 QLIST_REMOVE(ban, list);
7530 g_free(ban);
7531}
7532
Kevin Wolfa3a683c2019-05-06 19:17:57 +02007533static void bdrv_detach_aio_context(BlockDriverState *bs)
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02007534{
Stefan Hajnoczie8a095d2016-06-16 17:56:26 +01007535 BdrvAioNotifier *baf, *baf_tmp;
Max Reitz33384422014-06-20 21:57:33 +02007536
Stefan Hajnoczie8a095d2016-06-16 17:56:26 +01007537 assert(!bs->walking_aio_notifiers);
Emanuele Giuseppe Espositoda359902022-03-03 10:16:11 -05007538 GLOBAL_STATE_CODE();
Stefan Hajnoczie8a095d2016-06-16 17:56:26 +01007539 bs->walking_aio_notifiers = true;
7540 QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
7541 if (baf->deleted) {
7542 bdrv_do_remove_aio_context_notifier(baf);
7543 } else {
7544 baf->detach_aio_context(baf->opaque);
7545 }
Max Reitz33384422014-06-20 21:57:33 +02007546 }
Stefan Hajnoczie8a095d2016-06-16 17:56:26 +01007547 /* Never mind iterating again to check for ->deleted. bdrv_close() will
7548 * remove remaining aio notifiers if we aren't called again.
7549 */
7550 bs->walking_aio_notifiers = false;
Max Reitz33384422014-06-20 21:57:33 +02007551
Kevin Wolf1bffe1a2019-04-17 17:15:25 +02007552 if (bs->drv && bs->drv->bdrv_detach_aio_context) {
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02007553 bs->drv->bdrv_detach_aio_context(bs);
7554 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02007555
7556 bs->aio_context = NULL;
7557}
7558
Kevin Wolfa3a683c2019-05-06 19:17:57 +02007559static void bdrv_attach_aio_context(BlockDriverState *bs,
7560 AioContext *new_context)
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02007561{
Stefan Hajnoczie8a095d2016-06-16 17:56:26 +01007562 BdrvAioNotifier *ban, *ban_tmp;
Emanuele Giuseppe Espositoda359902022-03-03 10:16:11 -05007563 GLOBAL_STATE_CODE();
Max Reitz33384422014-06-20 21:57:33 +02007564
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02007565 bs->aio_context = new_context;
7566
Kevin Wolf1bffe1a2019-04-17 17:15:25 +02007567 if (bs->drv && bs->drv->bdrv_attach_aio_context) {
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02007568 bs->drv->bdrv_attach_aio_context(bs, new_context);
7569 }
Max Reitz33384422014-06-20 21:57:33 +02007570
Stefan Hajnoczie8a095d2016-06-16 17:56:26 +01007571 assert(!bs->walking_aio_notifiers);
7572 bs->walking_aio_notifiers = true;
7573 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) {
7574 if (ban->deleted) {
7575 bdrv_do_remove_aio_context_notifier(ban);
7576 } else {
7577 ban->attached_aio_context(new_context, ban->opaque);
7578 }
Max Reitz33384422014-06-20 21:57:33 +02007579 }
Stefan Hajnoczie8a095d2016-06-16 17:56:26 +01007580 bs->walking_aio_notifiers = false;
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02007581}
7582
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007583typedef struct BdrvStateSetAioContext {
7584 AioContext *new_ctx;
7585 BlockDriverState *bs;
7586} BdrvStateSetAioContext;
7587
Fiona Ebner91ba0e12025-05-30 17:10:45 +02007588/*
7589 * Changes the AioContext of @child to @ctx and recursively for the associated
7590 * block nodes and all their children and parents. Returns true if the change is
7591 * possible and the transaction @tran can be continued. Returns false and sets
7592 * @errp if not and the transaction must be aborted.
7593 *
7594 * @visited will accumulate all visited BdrvChild objects. The caller is
7595 * responsible for freeing the list afterwards.
7596 *
7597 * Must be called with the affected block nodes drained.
7598 */
Fiona Ebner37587332025-05-30 17:10:42 +02007599static bool GRAPH_RDLOCK
7600bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx,
7601 GHashTable *visited, Transaction *tran,
7602 Error **errp)
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007603{
7604 GLOBAL_STATE_CODE();
Emanuele Giuseppe Espositoe08cc002022-10-25 04:49:45 -04007605 if (g_hash_table_contains(visited, c)) {
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007606 return true;
7607 }
Emanuele Giuseppe Espositoe08cc002022-10-25 04:49:45 -04007608 g_hash_table_add(visited, c);
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007609
7610 /*
7611 * A BdrvChildClass that doesn't handle AioContext changes cannot
7612 * tolerate any AioContext changes
7613 */
7614 if (!c->klass->change_aio_ctx) {
7615 char *user = bdrv_child_user_desc(c);
7616 error_setg(errp, "Changing iothreads is not supported by %s", user);
7617 g_free(user);
7618 return false;
7619 }
7620 if (!c->klass->change_aio_ctx(c, ctx, visited, tran, errp)) {
7621 assert(!errp || *errp);
7622 return false;
7623 }
7624 return true;
7625}
7626
Fiona Ebner91ba0e12025-05-30 17:10:45 +02007627/*
7628 * Changes the AioContext of @c->bs to @ctx and recursively for all its children
7629 * and parents. Returns true if the change is possible and the transaction @tran
7630 * can be continued. Returns false and sets @errp if not and the transaction
7631 * must be aborted.
7632 *
7633 * @visited will accumulate all visited BdrvChild objects. The caller is
7634 * responsible for freeing the list afterwards.
7635 *
7636 * Must be called with the affected block nodes drained.
7637 */
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007638bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
Emanuele Giuseppe Espositoe08cc002022-10-25 04:49:45 -04007639 GHashTable *visited, Transaction *tran,
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007640 Error **errp)
7641{
7642 GLOBAL_STATE_CODE();
Emanuele Giuseppe Espositoe08cc002022-10-25 04:49:45 -04007643 if (g_hash_table_contains(visited, c)) {
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007644 return true;
7645 }
Emanuele Giuseppe Espositoe08cc002022-10-25 04:49:45 -04007646 g_hash_table_add(visited, c);
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007647 return bdrv_change_aio_context(c->bs, ctx, visited, tran, errp);
7648}
7649
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007650static void bdrv_set_aio_context_clean(void *opaque)
7651{
7652 BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque;
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007653
7654 g_free(state);
7655}
7656
7657static void bdrv_set_aio_context_commit(void *opaque)
7658{
7659 BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque;
7660 BlockDriverState *bs = (BlockDriverState *) state->bs;
7661 AioContext *new_context = state->new_ctx;
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007662
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007663 bdrv_detach_aio_context(bs);
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007664 bdrv_attach_aio_context(bs, new_context);
7665}
7666
7667static TransactionActionDrv set_aio_context = {
7668 .commit = bdrv_set_aio_context_commit,
7669 .clean = bdrv_set_aio_context_clean,
7670};
7671
Kevin Wolf42a65f02019-05-07 18:31:38 +02007672/*
7673 * Changes the AioContext used for fd handlers, timers, and BHs by this
7674 * BlockDriverState and all its children and parents.
7675 *
Max Reitz43eaaae2019-07-22 15:30:54 +02007676 * Must be called from the main AioContext.
7677 *
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007678 * @visited will accumulate all visited BdrvChild objects. The caller is
Kevin Wolf42a65f02019-05-07 18:31:38 +02007679 * responsible for freeing the list afterwards.
Fiona Ebner91ba0e12025-05-30 17:10:45 +02007680 *
7681 * @bs must be drained.
Kevin Wolf42a65f02019-05-07 18:31:38 +02007682 */
Fiona Ebner91ba0e12025-05-30 17:10:45 +02007683static bool GRAPH_RDLOCK
7684bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
7685 GHashTable *visited, Transaction *tran, Error **errp)
Kevin Wolf5d231842019-05-06 19:17:56 +02007686{
7687 BdrvChild *c;
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007688 BdrvStateSetAioContext *state;
7689
7690 GLOBAL_STATE_CODE();
Kevin Wolf5d231842019-05-06 19:17:56 +02007691
7692 if (bdrv_get_aio_context(bs) == ctx) {
7693 return true;
7694 }
7695
7696 QLIST_FOREACH(c, &bs->parents, next_parent) {
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007697 if (!bdrv_parent_change_aio_context(c, ctx, visited, tran, errp)) {
Kevin Wolf5d231842019-05-06 19:17:56 +02007698 return false;
7699 }
7700 }
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007701
Kevin Wolf5d231842019-05-06 19:17:56 +02007702 QLIST_FOREACH(c, &bs->children, next) {
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007703 if (!bdrv_child_change_aio_context(c, ctx, visited, tran, errp)) {
Kevin Wolf5d231842019-05-06 19:17:56 +02007704 return false;
7705 }
7706 }
7707
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007708 state = g_new(BdrvStateSetAioContext, 1);
7709 *state = (BdrvStateSetAioContext) {
7710 .new_ctx = ctx,
7711 .bs = bs,
7712 };
7713
Fiona Ebner91ba0e12025-05-30 17:10:45 +02007714 assert(bs->quiesce_counter > 0);
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007715
7716 tran_add(tran, &set_aio_context, state);
7717
Kevin Wolf5d231842019-05-06 19:17:56 +02007718 return true;
7719}
7720
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007721/*
7722 * Change bs's and recursively all of its parents' and children's AioContext
7723 * to the given new context, returning an error if that isn't possible.
7724 *
7725 * If ignore_child is not NULL, that child (and its subgraph) will not
7726 * be touched.
Fiona Ebnera1ea8eb2025-05-30 17:10:46 +02007727 *
7728 * Called with the graph lock held.
7729 *
7730 * Called while all bs are drained.
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007731 */
Fiona Ebnera1ea8eb2025-05-30 17:10:46 +02007732int bdrv_try_change_aio_context_locked(BlockDriverState *bs, AioContext *ctx,
7733 BdrvChild *ignore_child, Error **errp)
Kevin Wolf5d231842019-05-06 19:17:56 +02007734{
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007735 Transaction *tran;
Emanuele Giuseppe Espositoe08cc002022-10-25 04:49:45 -04007736 GHashTable *visited;
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007737 int ret;
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007738 GLOBAL_STATE_CODE();
7739
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007740 /*
7741 * Recursion phase: go through all nodes of the graph.
Fiona Ebnera1ea8eb2025-05-30 17:10:46 +02007742 * Take care of checking that all nodes support changing AioContext,
7743 * building a linear list of callbacks to run if everything is successful
7744 * (the transaction itself).
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007745 */
7746 tran = tran_new();
Emanuele Giuseppe Espositoe08cc002022-10-25 04:49:45 -04007747 visited = g_hash_table_new(NULL, NULL);
7748 if (ignore_child) {
7749 g_hash_table_add(visited, ignore_child);
7750 }
7751 ret = bdrv_change_aio_context(bs, ctx, visited, tran, errp);
7752 g_hash_table_destroy(visited);
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007753
7754 /*
7755 * Linear phase: go through all callbacks collected in the transaction.
Stefan Hajnoczi23c983c2023-12-05 13:20:11 -05007756 * Run all callbacks collected in the recursion to switch every node's
7757 * AioContext (transaction commit), or undo all changes done in the
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007758 * recursion (transaction abort).
7759 */
Kevin Wolf5d231842019-05-06 19:17:56 +02007760
7761 if (!ret) {
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007762 /* Just run clean() callbacks. No AioContext changed. */
7763 tran_abort(tran);
Kevin Wolf5d231842019-05-06 19:17:56 +02007764 return -EPERM;
7765 }
7766
Emanuele Giuseppe Esposito7e8c1822022-10-25 04:49:44 -04007767 tran_commit(tran);
Fiona Ebnera1ea8eb2025-05-30 17:10:46 +02007768 return 0;
7769}
7770
7771/*
7772 * Change bs's and recursively all of its parents' and children's AioContext
7773 * to the given new context, returning an error if that isn't possible.
7774 *
7775 * If ignore_child is not NULL, that child (and its subgraph) will not
7776 * be touched.
7777 */
7778int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
7779 BdrvChild *ignore_child, Error **errp)
7780{
7781 int ret;
7782
7783 GLOBAL_STATE_CODE();
7784
7785 bdrv_drain_all_begin();
7786 bdrv_graph_rdlock_main_loop();
7787 ret = bdrv_try_change_aio_context_locked(bs, ctx, ignore_child, errp);
Fiona Ebner91ba0e12025-05-30 17:10:45 +02007788 bdrv_graph_rdunlock_main_loop();
7789 bdrv_drain_all_end();
Fiona Ebnera1ea8eb2025-05-30 17:10:46 +02007790
7791 return ret;
Kevin Wolf5d231842019-05-06 19:17:56 +02007792}
7793
Max Reitz33384422014-06-20 21:57:33 +02007794void bdrv_add_aio_context_notifier(BlockDriverState *bs,
7795 void (*attached_aio_context)(AioContext *new_context, void *opaque),
7796 void (*detach_aio_context)(void *opaque), void *opaque)
7797{
7798 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
7799 *ban = (BdrvAioNotifier){
7800 .attached_aio_context = attached_aio_context,
7801 .detach_aio_context = detach_aio_context,
7802 .opaque = opaque
7803 };
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007804 GLOBAL_STATE_CODE();
Max Reitz33384422014-06-20 21:57:33 +02007805
7806 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
7807}
7808
7809void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
7810 void (*attached_aio_context)(AioContext *,
7811 void *),
7812 void (*detach_aio_context)(void *),
7813 void *opaque)
7814{
7815 BdrvAioNotifier *ban, *ban_next;
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007816 GLOBAL_STATE_CODE();
Max Reitz33384422014-06-20 21:57:33 +02007817
7818 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
7819 if (ban->attached_aio_context == attached_aio_context &&
7820 ban->detach_aio_context == detach_aio_context &&
Stefan Hajnoczie8a095d2016-06-16 17:56:26 +01007821 ban->opaque == opaque &&
7822 ban->deleted == false)
Max Reitz33384422014-06-20 21:57:33 +02007823 {
Stefan Hajnoczie8a095d2016-06-16 17:56:26 +01007824 if (bs->walking_aio_notifiers) {
7825 ban->deleted = true;
7826 } else {
7827 bdrv_do_remove_aio_context_notifier(ban);
7828 }
Max Reitz33384422014-06-20 21:57:33 +02007829 return;
7830 }
7831 }
7832
7833 abort();
7834}
7835
Max Reitz77485432014-10-27 11:12:50 +01007836int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
Max Reitzd1402b52018-05-09 23:00:18 +02007837 BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
Maxim Levitskya3579bf2020-06-25 14:55:38 +02007838 bool force,
Max Reitzd1402b52018-05-09 23:00:18 +02007839 Error **errp)
Max Reitz6f176b42013-09-03 10:09:50 +02007840{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007841 GLOBAL_STATE_CODE();
Max Reitzd470ad42017-11-10 21:31:09 +01007842 if (!bs->drv) {
Max Reitzd1402b52018-05-09 23:00:18 +02007843 error_setg(errp, "Node is ejected");
Max Reitzd470ad42017-11-10 21:31:09 +01007844 return -ENOMEDIUM;
7845 }
Chunyan Liuc282e1f2014-06-05 17:21:11 +08007846 if (!bs->drv->bdrv_amend_options) {
Max Reitzd1402b52018-05-09 23:00:18 +02007847 error_setg(errp, "Block driver '%s' does not support option amendment",
7848 bs->drv->format_name);
Max Reitz6f176b42013-09-03 10:09:50 +02007849 return -ENOTSUP;
7850 }
Maxim Levitskya3579bf2020-06-25 14:55:38 +02007851 return bs->drv->bdrv_amend_options(bs, opts, status_cb,
7852 cb_opaque, force, errp);
Max Reitz6f176b42013-09-03 10:09:50 +02007853}
Benoît Canetf6186f42013-10-02 14:33:48 +02007854
Max Reitz5d69b5a2020-02-18 11:34:41 +01007855/*
7856 * This function checks whether the given @to_replace is allowed to be
7857 * replaced by a node that always shows the same data as @bs. This is
7858 * used for example to verify whether the mirror job can replace
7859 * @to_replace by the target mirrored from @bs.
7860 * To be replaceable, @bs and @to_replace may either be guaranteed to
7861 * always show the same data (because they are only connected through
7862 * filters), or some driver may allow replacing one of its children
7863 * because it can guarantee that this child's data is not visible at
7864 * all (for example, for dissenting quorum children that have no other
7865 * parents).
7866 */
7867bool bdrv_recurse_can_replace(BlockDriverState *bs,
7868 BlockDriverState *to_replace)
7869{
Max Reitz93393e62019-06-12 17:03:38 +02007870 BlockDriverState *filtered;
7871
Emanuele Giuseppe Espositob4ad82a2022-03-03 10:15:57 -05007872 GLOBAL_STATE_CODE();
7873
Max Reitz5d69b5a2020-02-18 11:34:41 +01007874 if (!bs || !bs->drv) {
7875 return false;
7876 }
7877
7878 if (bs == to_replace) {
7879 return true;
7880 }
7881
7882 /* See what the driver can do */
7883 if (bs->drv->bdrv_recurse_can_replace) {
7884 return bs->drv->bdrv_recurse_can_replace(bs, to_replace);
7885 }
7886
7887 /* For filters without an own implementation, we can recurse on our own */
Max Reitz93393e62019-06-12 17:03:38 +02007888 filtered = bdrv_filter_bs(bs);
7889 if (filtered) {
7890 return bdrv_recurse_can_replace(filtered, to_replace);
Max Reitz5d69b5a2020-02-18 11:34:41 +01007891 }
7892
7893 /* Safe default */
7894 return false;
7895}
7896
Max Reitz810803a2020-02-18 11:34:44 +01007897/*
7898 * Check whether the given @node_name can be replaced by a node that
7899 * has the same data as @parent_bs. If so, return @node_name's BDS;
7900 * NULL otherwise.
7901 *
7902 * @node_name must be a (recursive) *child of @parent_bs (or this
7903 * function will return NULL).
7904 *
7905 * The result (whether the node can be replaced or not) is only valid
7906 * for as long as no graph or permission changes occur.
7907 */
Wen Congyange12f3782015-07-17 10:12:22 +08007908BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
7909 const char *node_name, Error **errp)
Benoît Canet09158f02014-06-27 18:25:25 +02007910{
7911 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01007912
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05007913 GLOBAL_STATE_CODE();
7914
Benoît Canet09158f02014-06-27 18:25:25 +02007915 if (!to_replace_bs) {
Connor Kuehl785ec4b2021-03-05 09:19:28 -06007916 error_setg(errp, "Failed to find node with node-name='%s'", node_name);
Benoît Canet09158f02014-06-27 18:25:25 +02007917 return NULL;
7918 }
7919
7920 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
Stefan Hajnoczib49f4752023-12-05 13:20:03 -05007921 return NULL;
Benoît Canet09158f02014-06-27 18:25:25 +02007922 }
7923
7924 /* We don't want arbitrary node of the BDS chain to be replaced only the top
7925 * most non filter in order to prevent data corruption.
7926 * Another benefit is that this tests exclude backing files which are
7927 * blocked by the backing blockers.
7928 */
Max Reitz810803a2020-02-18 11:34:44 +01007929 if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) {
7930 error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', "
7931 "because it cannot be guaranteed that doing so would not "
7932 "lead to an abrupt change of visible data",
7933 node_name, parent_bs->node_name);
Stefan Hajnoczib49f4752023-12-05 13:20:03 -05007934 return NULL;
Benoît Canet09158f02014-06-27 18:25:25 +02007935 }
7936
7937 return to_replace_bs;
7938}
Ming Lei448ad912014-07-04 18:04:33 +08007939
Max Reitz97e2f022019-02-01 20:29:27 +01007940/**
7941 * Iterates through the list of runtime option keys that are said to
7942 * be "strong" for a BDS. An option is called "strong" if it changes
7943 * a BDS's data. For example, the null block driver's "size" and
7944 * "read-zeroes" options are strong, but its "latency-ns" option is
7945 * not.
7946 *
7947 * If a key returned by this function ends with a dot, all options
7948 * starting with that prefix are strong.
7949 */
7950static const char *const *strong_options(BlockDriverState *bs,
7951 const char *const *curopt)
7952{
7953 static const char *const global_options[] = {
7954 "driver", "filename", NULL
7955 };
7956
7957 if (!curopt) {
7958 return &global_options[0];
7959 }
7960
7961 curopt++;
7962 if (curopt == &global_options[ARRAY_SIZE(global_options) - 1] && bs->drv) {
7963 curopt = bs->drv->strong_runtime_opts;
7964 }
7965
7966 return (curopt && *curopt) ? curopt : NULL;
7967}
7968
7969/**
7970 * Copies all strong runtime options from bs->options to the given
7971 * QDict. The set of strong option keys is determined by invoking
7972 * strong_options().
7973 *
7974 * Returns true iff any strong option was present in bs->options (and
7975 * thus copied to the target QDict) with the exception of "filename"
7976 * and "driver". The caller is expected to use this value to decide
7977 * whether the existence of strong options prevents the generation of
7978 * a plain filename.
7979 */
7980static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs)
7981{
7982 bool found_any = false;
7983 const char *const *option_name = NULL;
7984
7985 if (!bs->drv) {
7986 return false;
7987 }
7988
7989 while ((option_name = strong_options(bs, option_name))) {
7990 bool option_given = false;
7991
7992 assert(strlen(*option_name) > 0);
7993 if ((*option_name)[strlen(*option_name) - 1] != '.') {
7994 QObject *entry = qdict_get(bs->options, *option_name);
7995 if (!entry) {
7996 continue;
7997 }
7998
7999 qdict_put_obj(d, *option_name, qobject_ref(entry));
8000 option_given = true;
8001 } else {
8002 const QDictEntry *entry;
8003 for (entry = qdict_first(bs->options); entry;
8004 entry = qdict_next(bs->options, entry))
8005 {
8006 if (strstart(qdict_entry_key(entry), *option_name, NULL)) {
8007 qdict_put_obj(d, qdict_entry_key(entry),
8008 qobject_ref(qdict_entry_value(entry)));
8009 option_given = true;
8010 }
8011 }
8012 }
8013
8014 /* While "driver" and "filename" need to be included in a JSON filename,
8015 * their existence does not prohibit generation of a plain filename. */
8016 if (!found_any && option_given &&
8017 strcmp(*option_name, "driver") && strcmp(*option_name, "filename"))
8018 {
8019 found_any = true;
8020 }
8021 }
8022
Max Reitz62a01a272019-02-01 20:29:34 +01008023 if (!qdict_haskey(d, "driver")) {
8024 /* Drivers created with bdrv_new_open_driver() may not have a
8025 * @driver option. Add it here. */
8026 qdict_put_str(d, "driver", bs->drv->format_name);
8027 }
8028
Max Reitz97e2f022019-02-01 20:29:27 +01008029 return found_any;
8030}
8031
Max Reitz90993622019-02-01 20:29:09 +01008032/* Note: This function may return false positives; it may return true
8033 * even if opening the backing file specified by bs's image header
8034 * would result in exactly bs->backing. */
Kevin Wolf004915a2023-10-27 17:53:26 +02008035static bool GRAPH_RDLOCK bdrv_backing_overridden(BlockDriverState *bs)
Max Reitz90993622019-02-01 20:29:09 +01008036{
Emanuele Giuseppe Espositob4ad82a2022-03-03 10:15:57 -05008037 GLOBAL_STATE_CODE();
Max Reitz90993622019-02-01 20:29:09 +01008038 if (bs->backing) {
8039 return strcmp(bs->auto_backing_file,
8040 bs->backing->bs->filename);
8041 } else {
8042 /* No backing BDS, so if the image header reports any backing
8043 * file, it must have been suppressed */
8044 return bs->auto_backing_file[0] != '\0';
8045 }
8046}
8047
Max Reitz91af7012014-07-18 20:24:56 +02008048/* Updates the following BDS fields:
8049 * - exact_filename: A filename which may be used for opening a block device
8050 * which (mostly) equals the given BDS (even without any
8051 * other options; so reading and writing must return the same
8052 * results, but caching etc. may be different)
8053 * - full_open_options: Options which, when given when opening a block device
8054 * (without a filename), result in a BDS (mostly)
8055 * equalling the given one
8056 * - filename: If exact_filename is set, it is copied here. Otherwise,
8057 * full_open_options is converted to a JSON object, prefixed with
8058 * "json:" (for use through the JSON pseudo protocol) and put here.
8059 */
8060void bdrv_refresh_filename(BlockDriverState *bs)
8061{
8062 BlockDriver *drv = bs->drv;
Max Reitze24518e2019-02-01 20:29:06 +01008063 BdrvChild *child;
Max Reitz52f72d62019-06-12 17:43:03 +02008064 BlockDriverState *primary_child_bs;
Max Reitz91af7012014-07-18 20:24:56 +02008065 QDict *opts;
Max Reitz90993622019-02-01 20:29:09 +01008066 bool backing_overridden;
Max Reitz998b3a12019-02-01 20:29:28 +01008067 bool generate_json_filename; /* Whether our default implementation should
8068 fill exact_filename (false) or not (true) */
Max Reitz91af7012014-07-18 20:24:56 +02008069
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05008070 GLOBAL_STATE_CODE();
8071
Max Reitz91af7012014-07-18 20:24:56 +02008072 if (!drv) {
8073 return;
8074 }
8075
Max Reitze24518e2019-02-01 20:29:06 +01008076 /* This BDS's file name may depend on any of its children's file names, so
8077 * refresh those first */
8078 QLIST_FOREACH(child, &bs->children, next) {
8079 bdrv_refresh_filename(child->bs);
Max Reitz91af7012014-07-18 20:24:56 +02008080 }
8081
Max Reitzbb808d52019-02-01 20:29:07 +01008082 if (bs->implicit) {
8083 /* For implicit nodes, just copy everything from the single child */
8084 child = QLIST_FIRST(&bs->children);
8085 assert(QLIST_NEXT(child, next) == NULL);
8086
8087 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
8088 child->bs->exact_filename);
8089 pstrcpy(bs->filename, sizeof(bs->filename), child->bs->filename);
8090
Pan Nengyuancb895612020-01-16 16:56:00 +08008091 qobject_unref(bs->full_open_options);
Max Reitzbb808d52019-02-01 20:29:07 +01008092 bs->full_open_options = qobject_ref(child->bs->full_open_options);
8093
8094 return;
8095 }
8096
Max Reitz90993622019-02-01 20:29:09 +01008097 backing_overridden = bdrv_backing_overridden(bs);
8098
8099 if (bs->open_flags & BDRV_O_NO_IO) {
8100 /* Without I/O, the backing file does not change anything.
8101 * Therefore, in such a case (primarily qemu-img), we can
8102 * pretend the backing file has not been overridden even if
8103 * it technically has been. */
8104 backing_overridden = false;
8105 }
8106
Max Reitz97e2f022019-02-01 20:29:27 +01008107 /* Gather the options QDict */
8108 opts = qdict_new();
Max Reitz998b3a12019-02-01 20:29:28 +01008109 generate_json_filename = append_strong_runtime_options(opts, bs);
8110 generate_json_filename |= backing_overridden;
Max Reitz97e2f022019-02-01 20:29:27 +01008111
8112 if (drv->bdrv_gather_child_options) {
8113 /* Some block drivers may not want to present all of their children's
8114 * options, or name them differently from BdrvChild.name */
8115 drv->bdrv_gather_child_options(bs, opts, backing_overridden);
8116 } else {
8117 QLIST_FOREACH(child, &bs->children, next) {
Max Reitz25191e52020-05-13 13:05:33 +02008118 if (child == bs->backing && !backing_overridden) {
Max Reitz97e2f022019-02-01 20:29:27 +01008119 /* We can skip the backing BDS if it has not been overridden */
8120 continue;
8121 }
8122
8123 qdict_put(opts, child->name,
8124 qobject_ref(child->bs->full_open_options));
8125 }
8126
8127 if (backing_overridden && !bs->backing) {
8128 /* Force no backing file */
8129 qdict_put_null(opts, "backing");
8130 }
8131 }
8132
8133 qobject_unref(bs->full_open_options);
8134 bs->full_open_options = opts;
8135
Max Reitz52f72d62019-06-12 17:43:03 +02008136 primary_child_bs = bdrv_primary_bs(bs);
8137
Max Reitz998b3a12019-02-01 20:29:28 +01008138 if (drv->bdrv_refresh_filename) {
8139 /* Obsolete information is of no use here, so drop the old file name
8140 * information before refreshing it */
8141 bs->exact_filename[0] = '\0';
8142
8143 drv->bdrv_refresh_filename(bs);
Max Reitz52f72d62019-06-12 17:43:03 +02008144 } else if (primary_child_bs) {
8145 /*
8146 * Try to reconstruct valid information from the underlying
8147 * file -- this only works for format nodes (filter nodes
8148 * cannot be probed and as such must be selected by the user
8149 * either through an options dict, or through a special
8150 * filename which the filter driver must construct in its
8151 * .bdrv_refresh_filename() implementation).
8152 */
Max Reitz998b3a12019-02-01 20:29:28 +01008153
8154 bs->exact_filename[0] = '\0';
8155
Max Reitzfb695c72019-02-01 20:29:29 +01008156 /*
8157 * We can use the underlying file's filename if:
8158 * - it has a filename,
Max Reitz52f72d62019-06-12 17:43:03 +02008159 * - the current BDS is not a filter,
Max Reitzfb695c72019-02-01 20:29:29 +01008160 * - the file is a protocol BDS, and
8161 * - opening that file (as this BDS's format) will automatically create
8162 * the BDS tree we have right now, that is:
8163 * - the user did not significantly change this BDS's behavior with
8164 * some explicit (strong) options
8165 * - no non-file child of this BDS has been overridden by the user
8166 * Both of these conditions are represented by generate_json_filename.
8167 */
Max Reitz52f72d62019-06-12 17:43:03 +02008168 if (primary_child_bs->exact_filename[0] &&
Paolo Bonzini41770f62022-11-24 16:21:18 +01008169 primary_child_bs->drv->protocol_name &&
Max Reitz52f72d62019-06-12 17:43:03 +02008170 !drv->is_filter && !generate_json_filename)
Max Reitzfb695c72019-02-01 20:29:29 +01008171 {
Max Reitz52f72d62019-06-12 17:43:03 +02008172 strcpy(bs->exact_filename, primary_child_bs->exact_filename);
Max Reitz998b3a12019-02-01 20:29:28 +01008173 }
8174 }
8175
Max Reitz91af7012014-07-18 20:24:56 +02008176 if (bs->exact_filename[0]) {
8177 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
Max Reitz97e2f022019-02-01 20:29:27 +01008178 } else {
Markus Armbrustereab3a462020-12-11 18:11:37 +01008179 GString *json = qobject_to_json(QOBJECT(bs->full_open_options));
Eric Blake5c86bdf2020-06-08 13:26:38 -05008180 if (snprintf(bs->filename, sizeof(bs->filename), "json:%s",
Markus Armbrustereab3a462020-12-11 18:11:37 +01008181 json->str) >= sizeof(bs->filename)) {
Eric Blake5c86bdf2020-06-08 13:26:38 -05008182 /* Give user a hint if we truncated things. */
8183 strcpy(bs->filename + sizeof(bs->filename) - 4, "...");
8184 }
Markus Armbrustereab3a462020-12-11 18:11:37 +01008185 g_string_free(json, true);
Max Reitz91af7012014-07-18 20:24:56 +02008186 }
8187}
Wen Congyange06018a2016-05-10 15:36:37 +08008188
Max Reitz1e89d0f2019-02-01 20:29:18 +01008189char *bdrv_dirname(BlockDriverState *bs, Error **errp)
8190{
8191 BlockDriver *drv = bs->drv;
Max Reitz52f72d62019-06-12 17:43:03 +02008192 BlockDriverState *child_bs;
Max Reitz1e89d0f2019-02-01 20:29:18 +01008193
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05008194 GLOBAL_STATE_CODE();
8195
Max Reitz1e89d0f2019-02-01 20:29:18 +01008196 if (!drv) {
8197 error_setg(errp, "Node '%s' is ejected", bs->node_name);
8198 return NULL;
8199 }
8200
8201 if (drv->bdrv_dirname) {
8202 return drv->bdrv_dirname(bs, errp);
8203 }
8204
Max Reitz52f72d62019-06-12 17:43:03 +02008205 child_bs = bdrv_primary_bs(bs);
8206 if (child_bs) {
8207 return bdrv_dirname(child_bs, errp);
Max Reitz1e89d0f2019-02-01 20:29:18 +01008208 }
8209
8210 bdrv_refresh_filename(bs);
8211 if (bs->exact_filename[0] != '\0') {
8212 return path_combine(bs->exact_filename, "");
8213 }
8214
8215 error_setg(errp, "Cannot generate a base directory for %s nodes",
8216 drv->format_name);
8217 return NULL;
8218}
8219
Wen Congyange06018a2016-05-10 15:36:37 +08008220/*
8221 * Hot add/remove a BDS's child. So the user can take a child offline when
8222 * it is broken and take a new child online
8223 */
8224void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
8225 Error **errp)
8226{
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05008227 GLOBAL_STATE_CODE();
Wen Congyange06018a2016-05-10 15:36:37 +08008228 if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
8229 error_setg(errp, "The node %s does not support adding a child",
8230 bdrv_get_device_or_node_name(parent_bs));
8231 return;
8232 }
8233
Sam Li774c7262023-05-08 12:55:30 +08008234 /*
8235 * Non-zoned block drivers do not follow zoned storage constraints
8236 * (i.e. sequential writes to zones). Refuse mixing zoned and non-zoned
8237 * drivers in a graph.
8238 */
8239 if (!parent_bs->drv->supports_zoned_children &&
8240 child_bs->bl.zoned == BLK_Z_HM) {
8241 /*
8242 * The host-aware model allows zoned storage constraints and random
8243 * write. Allow mixing host-aware and non-zoned drivers. Using
8244 * host-aware device as a regular device.
8245 */
8246 error_setg(errp, "Cannot add a %s child to a %s parent",
8247 child_bs->bl.zoned == BLK_Z_HM ? "zoned" : "non-zoned",
8248 parent_bs->drv->supports_zoned_children ?
8249 "support zoned children" : "not support zoned children");
8250 return;
8251 }
8252
Wen Congyange06018a2016-05-10 15:36:37 +08008253 if (!QLIST_EMPTY(&child_bs->parents)) {
8254 error_setg(errp, "The node %s already has a parent",
8255 child_bs->node_name);
8256 return;
8257 }
8258
8259 parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp);
8260}
8261
8262void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
8263{
8264 BdrvChild *tmp;
8265
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05008266 GLOBAL_STATE_CODE();
Wen Congyange06018a2016-05-10 15:36:37 +08008267 if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
8268 error_setg(errp, "The node %s does not support removing a child",
8269 bdrv_get_device_or_node_name(parent_bs));
8270 return;
8271 }
8272
8273 QLIST_FOREACH(tmp, &parent_bs->children, next) {
8274 if (tmp == child) {
8275 break;
8276 }
8277 }
8278
8279 if (!tmp) {
8280 error_setg(errp, "The node %s does not have a child named %s",
8281 bdrv_get_device_or_node_name(parent_bs),
8282 bdrv_get_device_or_node_name(child->bs));
8283 return;
8284 }
8285
8286 parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
8287}
Max Reitz6f7a3b52020-04-29 16:11:23 +02008288
8289int bdrv_make_empty(BdrvChild *c, Error **errp)
8290{
8291 BlockDriver *drv = c->bs->drv;
8292 int ret;
8293
Emanuele Giuseppe Espositof791bf72022-03-03 10:15:49 -05008294 GLOBAL_STATE_CODE();
Max Reitz6f7a3b52020-04-29 16:11:23 +02008295 assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED));
8296
8297 if (!drv->bdrv_make_empty) {
8298 error_setg(errp, "%s does not support emptying nodes",
8299 drv->format_name);
8300 return -ENOTSUP;
8301 }
8302
8303 ret = drv->bdrv_make_empty(c->bs);
8304 if (ret < 0) {
8305 error_setg_errno(errp, -ret, "Failed to empty %s",
8306 c->bs->filename);
8307 return ret;
8308 }
8309
8310 return 0;
8311}
Max Reitz9a6fc882019-05-31 15:23:11 +02008312
8313/*
8314 * Return the child that @bs acts as an overlay for, and from which data may be
8315 * copied in COW or COR operations. Usually this is the backing file.
8316 */
8317BdrvChild *bdrv_cow_child(BlockDriverState *bs)
8318{
Emanuele Giuseppe Esposito967d7902022-03-03 10:15:58 -05008319 IO_CODE();
8320
Max Reitz9a6fc882019-05-31 15:23:11 +02008321 if (!bs || !bs->drv) {
8322 return NULL;
8323 }
8324
8325 if (bs->drv->is_filter) {
8326 return NULL;
8327 }
8328
8329 if (!bs->backing) {
8330 return NULL;
8331 }
8332
8333 assert(bs->backing->role & BDRV_CHILD_COW);
8334 return bs->backing;
8335}
8336
8337/*
8338 * If @bs acts as a filter for exactly one of its children, return
8339 * that child.
8340 */
8341BdrvChild *bdrv_filter_child(BlockDriverState *bs)
8342{
8343 BdrvChild *c;
Emanuele Giuseppe Esposito967d7902022-03-03 10:15:58 -05008344 IO_CODE();
Max Reitz9a6fc882019-05-31 15:23:11 +02008345
8346 if (!bs || !bs->drv) {
8347 return NULL;
8348 }
8349
8350 if (!bs->drv->is_filter) {
8351 return NULL;
8352 }
8353
8354 /* Only one of @backing or @file may be used */
8355 assert(!(bs->backing && bs->file));
8356
8357 c = bs->backing ?: bs->file;
8358 if (!c) {
8359 return NULL;
8360 }
8361
8362 assert(c->role & BDRV_CHILD_FILTERED);
8363 return c;
8364}
8365
8366/*
8367 * Return either the result of bdrv_cow_child() or bdrv_filter_child(),
8368 * whichever is non-NULL.
8369 *
8370 * Return NULL if both are NULL.
8371 */
8372BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs)
8373{
8374 BdrvChild *cow_child = bdrv_cow_child(bs);
8375 BdrvChild *filter_child = bdrv_filter_child(bs);
Emanuele Giuseppe Esposito967d7902022-03-03 10:15:58 -05008376 IO_CODE();
Max Reitz9a6fc882019-05-31 15:23:11 +02008377
8378 /* Filter nodes cannot have COW backing files */
8379 assert(!(cow_child && filter_child));
8380
8381 return cow_child ?: filter_child;
8382}
8383
8384/*
8385 * Return the primary child of this node: For filters, that is the
8386 * filtered child. For other nodes, that is usually the child storing
8387 * metadata.
8388 * (A generally more helpful description is that this is (usually) the
8389 * child that has the same filename as @bs.)
8390 *
8391 * Drivers do not necessarily have a primary child; for example quorum
8392 * does not.
8393 */
8394BdrvChild *bdrv_primary_child(BlockDriverState *bs)
8395{
8396 BdrvChild *c, *found = NULL;
Emanuele Giuseppe Esposito967d7902022-03-03 10:15:58 -05008397 IO_CODE();
Max Reitz9a6fc882019-05-31 15:23:11 +02008398
8399 QLIST_FOREACH(c, &bs->children, next) {
8400 if (c->role & BDRV_CHILD_PRIMARY) {
8401 assert(!found);
8402 found = c;
8403 }
8404 }
8405
8406 return found;
8407}
Max Reitzd38d7eb2019-06-12 15:06:37 +02008408
Kevin Wolfec82cc42023-10-27 17:53:20 +02008409static BlockDriverState * GRAPH_RDLOCK
8410bdrv_do_skip_filters(BlockDriverState *bs, bool stop_on_explicit_filter)
Max Reitzd38d7eb2019-06-12 15:06:37 +02008411{
8412 BdrvChild *c;
8413
8414 if (!bs) {
8415 return NULL;
8416 }
8417
8418 while (!(stop_on_explicit_filter && !bs->implicit)) {
8419 c = bdrv_filter_child(bs);
8420 if (!c) {
8421 /*
8422 * A filter that is embedded in a working block graph must
8423 * have a child. Assert this here so this function does
8424 * not return a filter node that is not expected by the
8425 * caller.
8426 */
8427 assert(!bs->drv || !bs->drv->is_filter);
8428 break;
8429 }
8430 bs = c->bs;
8431 }
8432 /*
8433 * Note that this treats nodes with bs->drv == NULL as not being
8434 * filters (bs->drv == NULL should be replaced by something else
8435 * anyway).
8436 * The advantage of this behavior is that this function will thus
8437 * always return a non-NULL value (given a non-NULL @bs).
8438 */
8439
8440 return bs;
8441}
8442
8443/*
8444 * Return the first BDS that has not been added implicitly or that
8445 * does not have a filtered child down the chain starting from @bs
8446 * (including @bs itself).
8447 */
8448BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs)
8449{
Emanuele Giuseppe Espositob4ad82a2022-03-03 10:15:57 -05008450 GLOBAL_STATE_CODE();
Max Reitzd38d7eb2019-06-12 15:06:37 +02008451 return bdrv_do_skip_filters(bs, true);
8452}
8453
8454/*
8455 * Return the first BDS that does not have a filtered child down the
8456 * chain starting from @bs (including @bs itself).
8457 */
8458BlockDriverState *bdrv_skip_filters(BlockDriverState *bs)
8459{
Emanuele Giuseppe Esposito967d7902022-03-03 10:15:58 -05008460 IO_CODE();
Max Reitzd38d7eb2019-06-12 15:06:37 +02008461 return bdrv_do_skip_filters(bs, false);
8462}
8463
8464/*
8465 * For a backing chain, return the first non-filter backing image of
8466 * the first non-filter image.
8467 */
8468BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs)
8469{
Emanuele Giuseppe Esposito967d7902022-03-03 10:15:58 -05008470 IO_CODE();
Max Reitzd38d7eb2019-06-12 15:06:37 +02008471 return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs)));
8472}
Hanna Reitz0bc329f2021-08-12 10:41:44 +02008473
8474/**
8475 * Check whether [offset, offset + bytes) overlaps with the cached
8476 * block-status data region.
8477 *
8478 * If so, and @pnum is not NULL, set *pnum to `bsc.data_end - offset`,
8479 * which is what bdrv_bsc_is_data()'s interface needs.
8480 * Otherwise, *pnum is not touched.
8481 */
8482static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs,
8483 int64_t offset, int64_t bytes,
8484 int64_t *pnum)
8485{
8486 BdrvBlockStatusCache *bsc = qatomic_rcu_read(&bs->block_status_cache);
8487 bool overlaps;
8488
8489 overlaps =
8490 qatomic_read(&bsc->valid) &&
8491 ranges_overlap(offset, bytes, bsc->data_start,
8492 bsc->data_end - bsc->data_start);
8493
8494 if (overlaps && pnum) {
8495 *pnum = bsc->data_end - offset;
8496 }
8497
8498 return overlaps;
8499}
8500
8501/**
8502 * See block_int.h for this function's documentation.
8503 */
8504bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum)
8505{
Emanuele Giuseppe Esposito967d7902022-03-03 10:15:58 -05008506 IO_CODE();
Hanna Reitz0bc329f2021-08-12 10:41:44 +02008507 RCU_READ_LOCK_GUARD();
Hanna Reitz0bc329f2021-08-12 10:41:44 +02008508 return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum);
8509}
8510
8511/**
8512 * See block_int.h for this function's documentation.
8513 */
8514void bdrv_bsc_invalidate_range(BlockDriverState *bs,
8515 int64_t offset, int64_t bytes)
8516{
Emanuele Giuseppe Esposito967d7902022-03-03 10:15:58 -05008517 IO_CODE();
Hanna Reitz0bc329f2021-08-12 10:41:44 +02008518 RCU_READ_LOCK_GUARD();
8519
8520 if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) {
8521 qatomic_set(&bs->block_status_cache->valid, false);
8522 }
8523}
8524
8525/**
8526 * See block_int.h for this function's documentation.
8527 */
8528void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes)
8529{
8530 BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1);
8531 BdrvBlockStatusCache *old_bsc;
Emanuele Giuseppe Esposito967d7902022-03-03 10:15:58 -05008532 IO_CODE();
Hanna Reitz0bc329f2021-08-12 10:41:44 +02008533
8534 *new_bsc = (BdrvBlockStatusCache) {
8535 .valid = true,
8536 .data_start = offset,
8537 .data_end = offset + bytes,
8538 };
8539
8540 QEMU_LOCK_GUARD(&bs->bsc_modify_lock);
8541
8542 old_bsc = qatomic_rcu_read(&bs->block_status_cache);
8543 qatomic_rcu_set(&bs->block_status_cache, new_bsc);
8544 if (old_bsc) {
8545 g_free_rcu(old_bsc, rcu);
8546 }
8547}