diff options
Diffstat (limited to 'block/block-backend.c')
-rw-r--r-- | block/block-backend.c | 1092 |
1 files changed, 756 insertions, 336 deletions
diff --git a/block/block-backend.c b/block/block-backend.c index 6140d133e2..db6f9b92a3 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -14,6 +14,7 @@ #include "sysemu/block-backend.h" #include "block/block_int.h" #include "block/blockjob.h" +#include "block/coroutines.h" #include "block/throttle-groups.h" #include "hw/qdev-core.h" #include "sysemu/blockdev.h" @@ -32,8 +33,6 @@ #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ -static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb); - typedef struct BlockBackendAioNotifier { void (*attached_aio_context)(AioContext *new_context, void *opaque); void (*detach_aio_context)(void *opaque); @@ -45,7 +44,7 @@ struct BlockBackend { char *name; int refcnt; BdrvChild *root; - AioContext *ctx; + AioContext *ctx; /* access with atomic operations only */ DriveInfo *legacy_dinfo; /* null unless created by drive_new() */ QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */ QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */ @@ -55,9 +54,6 @@ struct BlockBackend { const BlockDevOps *dev_ops; void *dev_opaque; - /* the block size for which the guest device expects atomicity */ - int guest_block_size; - /* If the BDS tree is removed, some of its options are stored here (which * can be used to restore those options in the new BDS on insert) */ BlockBackendRootState root_state; @@ -78,12 +74,14 @@ struct BlockBackend { bool allow_aio_context_change; bool allow_write_beyond_eof; + /* Protected by BQL */ NotifierList remove_bs_notifiers, insert_bs_notifiers; QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers; - int quiesce_counter; + int quiesce_counter; /* atomic: written under BQL, read by other threads */ + QemuMutex queued_requests_lock; /* protects queued_requests */ CoQueue queued_requests; - bool disable_request_queuing; + bool disable_request_queuing; /* atomic */ VMChangeStateEntry *vmsh; bool force_allow_inactivate; @@ -103,22 +101,27 @@ typedef struct BlockBackendAIOCB { } BlockBackendAIOCB; static const AIOCBInfo block_backend_aiocb_info = { - .get_aio_context = blk_aiocb_get_aio_context, .aiocb_size = sizeof(BlockBackendAIOCB), }; static void drive_info_del(DriveInfo *dinfo); static BlockBackend *bdrv_first_blk(BlockDriverState *bs); -/* All BlockBackends */ +/* All BlockBackends. Protected by BQL. */ static QTAILQ_HEAD(, BlockBackend) block_backends = QTAILQ_HEAD_INITIALIZER(block_backends); -/* All BlockBackends referenced by the monitor and which are iterated through by - * blk_next() */ +/* + * All BlockBackends referenced by the monitor and which are iterated through by + * blk_next(). Protected by BQL. + */ static QTAILQ_HEAD(, BlockBackend) monitor_block_backends = QTAILQ_HEAD_INITIALIZER(monitor_block_backends); +static int coroutine_mixed_fn GRAPH_RDLOCK +blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp); + static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format, int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options) @@ -128,15 +131,14 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format, } static void blk_root_drained_begin(BdrvChild *child); static bool blk_root_drained_poll(BdrvChild *child); -static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter); +static void blk_root_drained_end(BdrvChild *child); static void blk_root_change_media(BdrvChild *child, bool load); static void blk_root_resize(BdrvChild *child); -static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, - GSList **ignore, Error **errp); -static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, - GSList **ignore); +static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp); static char *blk_root_get_parent_desc(BdrvChild *child) { @@ -185,10 +187,11 @@ static void blk_vm_state_changed(void *opaque, bool running, RunState state) * * If an error is returned, the VM cannot be allowed to be resumed. */ -static void blk_root_activate(BdrvChild *child, Error **errp) +static void GRAPH_RDLOCK blk_root_activate(BdrvChild *child, Error **errp) { BlockBackend *blk = child->opaque; Error *local_err = NULL; + uint64_t saved_shared_perm; if (!blk->disable_perm) { return; @@ -196,12 +199,22 @@ static void blk_root_activate(BdrvChild *child, Error **errp) blk->disable_perm = false; - blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err); + /* + * blk->shared_perm contains the permissions we want to share once + * migration is really completely done. For now, we need to share + * all; but we also need to retain blk->shared_perm, which is + * overwritten by a successful blk_set_perm() call. Save it and + * restore it below. + */ + saved_shared_perm = blk->shared_perm; + + blk_set_perm_locked(blk, blk->perm, BLK_PERM_ALL, &local_err); if (local_err) { error_propagate(errp, local_err); blk->disable_perm = true; return; } + blk->shared_perm = saved_shared_perm; if (runstate_check(RUN_STATE_INMIGRATE)) { /* Activation can happen when migration process is still active, for @@ -214,7 +227,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp) return; } - blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err); + blk_set_perm_locked(blk, blk->perm, blk->shared_perm, &local_err); if (local_err) { error_propagate(errp, local_err); blk->disable_perm = true; @@ -224,6 +237,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp) void blk_set_force_allow_inactivate(BlockBackend *blk) { + GLOBAL_STATE_CODE(); blk->force_allow_inactivate = true; } @@ -246,7 +260,7 @@ static bool blk_can_inactivate(BlockBackend *blk) return blk->force_allow_inactivate; } -static int blk_root_inactivate(BdrvChild *child) +static int GRAPH_RDLOCK blk_root_inactivate(BdrvChild *child) { BlockBackend *blk = child->opaque; @@ -299,6 +313,7 @@ static void blk_root_detach(BdrvChild *child) static AioContext *blk_root_get_parent_aio_context(BdrvChild *c) { BlockBackend *blk = c->opaque; + IO_CODE(); return blk_get_aio_context(blk); } @@ -321,8 +336,7 @@ static const BdrvChildClass child_root = { .attach = blk_root_attach, .detach = blk_root_detach, - .can_set_aio_ctx = blk_root_can_set_aio_ctx, - .set_aio_ctx = blk_root_set_aio_ctx, + .change_aio_ctx = blk_root_change_aio_ctx, .get_parent_aio_context = blk_root_get_parent_aio_context, }; @@ -342,6 +356,8 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) { BlockBackend *blk; + GLOBAL_STATE_CODE(); + blk = g_new0(BlockBackend, 1); blk->refcnt = 1; blk->ctx = ctx; @@ -354,6 +370,7 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) block_acct_init(&blk->stats); + qemu_mutex_init(&blk->queued_requests_lock); qemu_co_queue_init(&blk->queued_requests); notifier_list_init(&blk->remove_bs_notifiers); notifier_list_init(&blk->insert_bs_notifiers); @@ -379,6 +396,8 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, { BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm); + GLOBAL_STATE_CODE(); + if (blk_insert_bs(blk, bs, errp) < 0) { blk_unref(blk); return NULL; @@ -388,7 +407,9 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, /* * Creates a new BlockBackend, opens a new BlockDriverState, and connects both. - * The new BlockBackend is in the main AioContext. + * By default, the new BlockBackend is in the main AioContext, but if the + * parameters connect it with any existing node in a different AioContext, it + * may end up there instead. * * Just as with bdrv_open(), after having called this function the reference to * @options belongs to the block layer (even on failure). @@ -407,6 +428,8 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, uint64_t perm = 0; uint64_t shared = BLK_PERM_ALL; + GLOBAL_STATE_CODE(); + /* * blk_new_open() is mainly used in .bdrv_create implementations and the * tools where sharing isn't a major concern because the BDS stays private @@ -431,16 +454,19 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED; } - blk = blk_new(qemu_get_aio_context(), perm, shared); bs = bdrv_open(filename, reference, options, flags, errp); if (!bs) { - blk_unref(blk); return NULL; } - blk->root = bdrv_root_attach_child(bs, "root", &child_root, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - perm, shared, blk, errp); + /* bdrv_open() could have moved bs to a different AioContext */ + blk = blk_new(bdrv_get_aio_context(bs), perm, shared); + blk->perm = perm; + blk->shared_perm = shared; + + blk_insert_bs(blk, bs, errp); + bdrv_unref(bs); + if (!blk->root) { blk_unref(blk); return NULL; @@ -467,6 +493,8 @@ static void blk_delete(BlockBackend *blk) assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers)); assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers)); assert(QLIST_EMPTY(&blk->aio_notifiers)); + assert(qemu_co_queue_empty(&blk->queued_requests)); + qemu_mutex_destroy(&blk->queued_requests_lock); QTAILQ_REMOVE(&block_backends, blk, link); drive_info_del(blk->legacy_dinfo); block_acct_cleanup(&blk->stats); @@ -484,6 +512,7 @@ static void drive_info_del(DriveInfo *dinfo) int blk_get_refcnt(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk ? blk->refcnt : 0; } @@ -494,6 +523,7 @@ int blk_get_refcnt(BlockBackend *blk) void blk_ref(BlockBackend *blk) { assert(blk->refcnt > 0); + GLOBAL_STATE_CODE(); blk->refcnt++; } @@ -504,6 +534,7 @@ void blk_ref(BlockBackend *blk) */ void blk_unref(BlockBackend *blk) { + GLOBAL_STATE_CODE(); if (blk) { assert(blk->refcnt > 0); if (blk->refcnt > 1) { @@ -524,6 +555,7 @@ void blk_unref(BlockBackend *blk) */ BlockBackend *blk_all_next(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&block_backends); } @@ -532,14 +564,12 @@ void blk_remove_all_bs(void) { BlockBackend *blk = NULL; - while ((blk = blk_all_next(blk)) != NULL) { - AioContext *ctx = blk_get_aio_context(blk); + GLOBAL_STATE_CODE(); - aio_context_acquire(ctx); + while ((blk = blk_all_next(blk)) != NULL) { if (blk->root) { blk_remove_bs(blk); } - aio_context_release(ctx); } } @@ -555,6 +585,7 @@ void blk_remove_all_bs(void) */ BlockBackend *blk_next(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk ? QTAILQ_NEXT(blk, monitor_link) : QTAILQ_FIRST(&monitor_block_backends); } @@ -568,14 +599,14 @@ BlockDriverState *bdrv_next(BdrvNextIterator *it) /* Must be called from the main loop */ assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + old_bs = it->bs; + /* First, return all root nodes of BlockBackends. In order to avoid * returning a BDS twice when multiple BBs refer to it, we only return it * if the BB is the first one in the parent list of the BDS. */ if (it->phase == BDRV_NEXT_BACKEND_ROOTS) { BlockBackend *old_blk = it->blk; - old_bs = old_blk ? blk_bs(old_blk) : NULL; - do { it->blk = blk_all_next(it->blk); bs = it->blk ? blk_bs(it->blk) : NULL; @@ -589,11 +620,10 @@ BlockDriverState *bdrv_next(BdrvNextIterator *it) if (bs) { bdrv_ref(bs); bdrv_unref(old_bs); + it->bs = bs; return bs; } it->phase = BDRV_NEXT_MONITOR_OWNED; - } else { - old_bs = it->bs; } /* Then return the monitor-owned BDSes without a BB attached. Ignore all @@ -621,6 +651,7 @@ static void bdrv_next_reset(BdrvNextIterator *it) BlockDriverState *bdrv_first(BdrvNextIterator *it) { + GLOBAL_STATE_CODE(); bdrv_next_reset(it); return bdrv_next(it); } @@ -632,13 +663,10 @@ void bdrv_next_cleanup(BdrvNextIterator *it) /* Must be called from the main loop */ assert(qemu_get_current_aio_context() == qemu_get_aio_context()); - if (it->phase == BDRV_NEXT_BACKEND_ROOTS) { - if (it->blk) { - bdrv_unref(blk_bs(it->blk)); - blk_unref(it->blk); - } - } else { - bdrv_unref(it->bs); + bdrv_unref(it->bs); + + if (it->phase == BDRV_NEXT_BACKEND_ROOTS && it->blk) { + blk_unref(it->blk); } bdrv_next_reset(it); @@ -658,6 +686,7 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp) { assert(!blk->name); assert(name && name[0]); + GLOBAL_STATE_CODE(); if (!id_wellformed(name)) { error_setg(errp, "Invalid device name"); @@ -685,6 +714,8 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp) */ void monitor_remove_blk(BlockBackend *blk) { + GLOBAL_STATE_CODE(); + if (!blk->name) { return; } @@ -700,6 +731,7 @@ void monitor_remove_blk(BlockBackend *blk) */ const char *blk_name(const BlockBackend *blk) { + IO_CODE(); return blk->name ?: ""; } @@ -711,6 +743,7 @@ BlockBackend *blk_by_name(const char *name) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); assert(name); while ((blk = blk_next(blk)) != NULL) { if (!strcmp(name, blk->name)) { @@ -725,12 +758,17 @@ BlockBackend *blk_by_name(const char *name) */ BlockDriverState *blk_bs(BlockBackend *blk) { + IO_CODE(); return blk->root ? blk->root->bs : NULL; } -static BlockBackend *bdrv_first_blk(BlockDriverState *bs) +static BlockBackend * GRAPH_RDLOCK bdrv_first_blk(BlockDriverState *bs) { BdrvChild *child; + + GLOBAL_STATE_CODE(); + assert_bdrv_graph_readable(); + QLIST_FOREACH(child, &bs->parents, next_parent) { if (child->klass == &child_root) { return child->opaque; @@ -745,6 +783,7 @@ static BlockBackend *bdrv_first_blk(BlockDriverState *bs) */ bool bdrv_has_blk(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); return bdrv_first_blk(bs) != NULL; } @@ -755,6 +794,9 @@ bool bdrv_is_root_node(BlockDriverState *bs) { BdrvChild *c; + GLOBAL_STATE_CODE(); + assert_bdrv_graph_readable(); + QLIST_FOREACH(c, &bs->parents, next_parent) { if (c->klass != &child_root) { return false; @@ -769,6 +811,7 @@ bool bdrv_is_root_node(BlockDriverState *bs) */ DriveInfo *blk_legacy_dinfo(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->legacy_dinfo; } @@ -780,6 +823,7 @@ DriveInfo *blk_legacy_dinfo(BlockBackend *blk) DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo) { assert(!blk->legacy_dinfo); + GLOBAL_STATE_CODE(); return blk->legacy_dinfo = dinfo; } @@ -790,6 +834,7 @@ DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo) BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); while ((blk = blk_next(blk)) != NULL) { if (blk->legacy_dinfo == dinfo) { @@ -804,6 +849,7 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo) */ BlockBackendPublic *blk_get_public(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return &blk->public; } @@ -812,6 +858,7 @@ BlockBackendPublic *blk_get_public(BlockBackend *blk) */ BlockBackend *blk_by_public(BlockBackendPublic *public) { + GLOBAL_STATE_CODE(); return container_of(public, BlockBackend, public); } @@ -821,16 +868,24 @@ BlockBackend *blk_by_public(BlockBackendPublic *public) void blk_remove_bs(BlockBackend *blk) { ThrottleGroupMember *tgm = &blk->public.throttle_group_member; - BlockDriverState *bs; BdrvChild *root; + GLOBAL_STATE_CODE(); + notifier_list_notify(&blk->remove_bs_notifiers, blk); if (tgm->throttle_state) { - bs = blk_bs(blk); + BlockDriverState *bs = blk_bs(blk); + + /* + * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for + * example, if a temporary filter node is removed by a blockjob. + */ + bdrv_ref(bs); bdrv_drained_begin(bs); throttle_group_detach_aio_context(tgm); throttle_group_attach_aio_context(tgm, qemu_get_aio_context()); bdrv_drained_end(bs); + bdrv_unref(bs); } blk_update_root_state(blk); @@ -842,7 +897,10 @@ void blk_remove_bs(BlockBackend *blk) blk_drain(blk); root = blk->root; blk->root = NULL; + + bdrv_graph_wrlock(); bdrv_root_unref_child(root); + bdrv_graph_wrunlock(); } /* @@ -851,11 +909,15 @@ void blk_remove_bs(BlockBackend *blk) int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) { ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + + GLOBAL_STATE_CODE(); bdrv_ref(bs); + bdrv_graph_wrlock(); blk->root = bdrv_root_attach_child(bs, "root", &child_root, BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, blk->perm, blk->shared_perm, blk, errp); + bdrv_graph_wrunlock(); if (blk->root == NULL) { return -EPERM; } @@ -874,16 +936,19 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) */ int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp) { + GLOBAL_STATE_CODE(); return bdrv_replace_child_bs(blk->root, new_bs, errp); } /* * Sets the permission bitmasks that the user of the BlockBackend needs. */ -int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, - Error **errp) +static int coroutine_mixed_fn GRAPH_RDLOCK +blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp) { int ret; + GLOBAL_STATE_CODE(); if (blk->root && !blk->disable_perm) { ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp); @@ -898,8 +963,18 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, return 0; } +int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp) +{ + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + + return blk_set_perm_locked(blk, perm, shared_perm, errp); +} + void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) { + GLOBAL_STATE_CODE(); *perm = blk->perm; *shared_perm = blk->shared_perm; } @@ -910,6 +985,7 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) */ int blk_attach_dev(BlockBackend *blk, DeviceState *dev) { + GLOBAL_STATE_CODE(); if (blk->dev) { return -EBUSY; } @@ -935,10 +1011,10 @@ int blk_attach_dev(BlockBackend *blk, DeviceState *dev) void blk_detach_dev(BlockBackend *blk, DeviceState *dev) { assert(blk->dev == dev); + GLOBAL_STATE_CODE(); blk->dev = NULL; blk->dev_ops = NULL; blk->dev_opaque = NULL; - blk->guest_block_size = 512; blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort); blk_unref(blk); } @@ -948,6 +1024,7 @@ void blk_detach_dev(BlockBackend *blk, DeviceState *dev) */ DeviceState *blk_get_attached_dev(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->dev; } @@ -956,6 +1033,7 @@ DeviceState *blk_get_attached_dev(BlockBackend *blk) char *blk_get_attached_dev_id(BlockBackend *blk) { DeviceState *dev = blk->dev; + IO_CODE(); if (!dev) { return g_strdup(""); @@ -976,6 +1054,8 @@ BlockBackend *blk_by_dev(void *dev) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); + assert(dev != NULL); while ((blk = blk_all_next(blk)) != NULL) { if (blk->dev == dev) { @@ -993,11 +1073,12 @@ BlockBackend *blk_by_dev(void *dev) void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque) { + GLOBAL_STATE_CODE(); blk->dev_ops = ops; blk->dev_opaque = opaque; /* Are we currently quiesced? Should we enforce this right now? */ - if (blk->quiesce_counter && ops->drained_begin) { + if (qatomic_read(&blk->quiesce_counter) && ops && ops->drained_begin) { ops->drained_begin(opaque); } } @@ -1014,6 +1095,7 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, */ void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp) { + GLOBAL_STATE_CODE(); if (blk->dev_ops && blk->dev_ops->change_media_cb) { bool tray_was_open, tray_is_open; Error *local_err = NULL; @@ -1046,6 +1128,7 @@ static void blk_root_change_media(BdrvChild *child, bool load) */ bool blk_dev_has_removable_media(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb); } @@ -1054,6 +1137,7 @@ bool blk_dev_has_removable_media(BlockBackend *blk) */ bool blk_dev_has_tray(BlockBackend *blk) { + IO_CODE(); return blk->dev_ops && blk->dev_ops->is_tray_open; } @@ -1063,6 +1147,7 @@ bool blk_dev_has_tray(BlockBackend *blk) */ void blk_dev_eject_request(BlockBackend *blk, bool force) { + GLOBAL_STATE_CODE(); if (blk->dev_ops && blk->dev_ops->eject_request_cb) { blk->dev_ops->eject_request_cb(blk->dev_opaque, force); } @@ -1073,6 +1158,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force) */ bool blk_dev_is_tray_open(BlockBackend *blk) { + IO_CODE(); if (blk_dev_has_tray(blk)) { return blk->dev_ops->is_tray_open(blk->dev_opaque); } @@ -1085,6 +1171,7 @@ bool blk_dev_is_tray_open(BlockBackend *blk) */ bool blk_dev_is_medium_locked(BlockBackend *blk) { + GLOBAL_STATE_CODE(); if (blk->dev_ops && blk->dev_ops->is_medium_locked) { return blk->dev_ops->is_medium_locked(blk->dev_opaque); } @@ -1105,6 +1192,7 @@ static void blk_root_resize(BdrvChild *child) void blk_iostatus_enable(BlockBackend *blk) { + GLOBAL_STATE_CODE(); blk->iostatus_enabled = true; blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK; } @@ -1113,6 +1201,7 @@ void blk_iostatus_enable(BlockBackend *blk) * enables it _and_ the VM is configured to stop on errors */ bool blk_iostatus_is_enabled(const BlockBackend *blk) { + IO_CODE(); return (blk->iostatus_enabled && (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || blk->on_write_error == BLOCKDEV_ON_ERROR_STOP || @@ -1121,16 +1210,19 @@ bool blk_iostatus_is_enabled(const BlockBackend *blk) BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->iostatus; } void blk_iostatus_disable(BlockBackend *blk) { + GLOBAL_STATE_CODE(); blk->iostatus_enabled = false; } void blk_iostatus_reset(BlockBackend *blk) { + GLOBAL_STATE_CODE(); if (blk_iostatus_is_enabled(blk)) { blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK; } @@ -1138,6 +1230,7 @@ void blk_iostatus_reset(BlockBackend *blk) void blk_iostatus_set_err(BlockBackend *blk, int error) { + IO_CODE(); assert(blk_iostatus_is_enabled(blk)); if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : @@ -1147,29 +1240,32 @@ void blk_iostatus_set_err(BlockBackend *blk, int error) void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow) { + IO_CODE(); blk->allow_write_beyond_eof = allow; } void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow) { + IO_CODE(); blk->allow_aio_context_change = allow; } void blk_set_disable_request_queuing(BlockBackend *blk, bool disable) { - blk->disable_request_queuing = disable; + IO_CODE(); + qatomic_set(&blk->disable_request_queuing, disable); } -static int blk_check_byte_request(BlockBackend *blk, int64_t offset, - size_t size) +static int coroutine_fn GRAPH_RDLOCK +blk_check_byte_request(BlockBackend *blk, int64_t offset, int64_t bytes) { int64_t len; - if (size > INT_MAX) { + if (bytes < 0) { return -EIO; } - if (!blk_is_available(blk)) { + if (!blk_co_is_available(blk)) { return -ENOMEDIUM; } @@ -1178,12 +1274,12 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, } if (!blk->allow_write_beyond_eof) { - len = blk_getlength(blk); + len = bdrv_co_getlength(blk_bs(blk)); if (len < 0) { return len; } - if (offset > len || len - offset < size) { + if (offset > len || len - offset < bytes) { return -EIO; } } @@ -1191,27 +1287,45 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, return 0; } +/* Are we currently in a drained section? */ +bool blk_in_drain(BlockBackend *blk) +{ + GLOBAL_STATE_CODE(); /* change to IO_OR_GS_CODE(), if necessary */ + return qatomic_read(&blk->quiesce_counter); +} + /* To be called between exactly one pair of blk_inc/dec_in_flight() */ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) { assert(blk->in_flight > 0); - if (blk->quiesce_counter && !blk->disable_request_queuing) { + if (qatomic_read(&blk->quiesce_counter) && + !qatomic_read(&blk->disable_request_queuing)) { + /* + * Take lock before decrementing in flight counter so main loop thread + * waits for us to enqueue ourselves before it can leave the drained + * section. + */ + qemu_mutex_lock(&blk->queued_requests_lock); blk_dec_in_flight(blk); - qemu_co_queue_wait(&blk->queued_requests, NULL); + qemu_co_queue_wait(&blk->queued_requests, &blk->queued_requests_lock); blk_inc_in_flight(blk); + qemu_mutex_unlock(&blk->queued_requests_lock); } } /* To be called between exactly one pair of blk_inc/dec_in_flight() */ static int coroutine_fn -blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, - QEMUIOVector *qiov, BdrvRequestFlags flags) +blk_co_do_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) { int ret; BlockDriverState *bs; + IO_CODE(); blk_wait_while_drained(blk); + GRAPH_RDLOCK_GUARD(); /* Call blk_bs() only after waiting, the graph may have changed */ bs = blk_bs(blk); @@ -1227,22 +1341,49 @@ blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, /* throttling disk I/O */ if (blk->public.throttle_group_member.throttle_state) { throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member, - bytes, false); + bytes, THROTTLE_READ); } - ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags); + ret = bdrv_co_preadv_part(blk->root, offset, bytes, qiov, qiov_offset, + flags); bdrv_dec_in_flight(bs); return ret; } +int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, int64_t bytes, + void *buf, BdrvRequestFlags flags) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_OR_GS_CODE(); + + assert(bytes <= SIZE_MAX); + + return blk_co_preadv(blk, offset, bytes, &qiov, flags); +} + int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, - unsigned int bytes, QEMUIOVector *qiov, + int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { int ret; + IO_OR_GS_CODE(); + + blk_inc_in_flight(blk); + ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, 0, flags); + blk_dec_in_flight(blk); + + return ret; +} + +int coroutine_fn blk_co_preadv_part(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + size_t qiov_offset, BdrvRequestFlags flags) +{ + int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); - ret = blk_do_preadv(blk, offset, bytes, qiov, flags); + ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, qiov_offset, flags); blk_dec_in_flight(blk); return ret; @@ -1250,14 +1391,16 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, /* To be called between exactly one pair of blk_inc/dec_in_flight() */ static int coroutine_fn -blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes, - QEMUIOVector *qiov, size_t qiov_offset, - BdrvRequestFlags flags) +blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) { int ret; BlockDriverState *bs; + IO_CODE(); blk_wait_while_drained(blk); + GRAPH_RDLOCK_GUARD(); /* Call blk_bs() only after waiting, the graph may have changed */ bs = blk_bs(blk); @@ -1272,7 +1415,7 @@ blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes, /* throttling disk I/O */ if (blk->public.throttle_group_member.throttle_state) { throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member, - bytes, true); + bytes, THROTTLE_WRITE); } if (!blk->enable_write_cache) { @@ -1286,100 +1429,85 @@ blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes, } int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, - unsigned int bytes, + int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags) { int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); - ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); + ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); blk_dec_in_flight(blk); return ret; } -int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, - unsigned int bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) +int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, int64_t bytes, + const void *buf, BdrvRequestFlags flags) { - return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags); -} - -typedef struct BlkRwCo { - BlockBackend *blk; - int64_t offset; - void *iobuf; - int ret; - BdrvRequestFlags flags; -} BlkRwCo; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_OR_GS_CODE(); -static void blk_read_entry(void *opaque) -{ - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; + assert(bytes <= SIZE_MAX); - rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size, - qiov, rwco->flags); - aio_wait_kick(); + return blk_co_pwritev(blk, offset, bytes, &qiov, flags); } -static void blk_write_entry(void *opaque) +int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - - rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size, - qiov, 0, rwco->flags); - aio_wait_kick(); + IO_OR_GS_CODE(); + return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags); } -static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, - int64_t bytes, CoroutineEntry co_entry, - BdrvRequestFlags flags) +int coroutine_fn blk_co_block_status_above(BlockBackend *blk, + BlockDriverState *base, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) { - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); - BlkRwCo rwco = { - .blk = blk, - .offset = offset, - .iobuf = &qiov, - .flags = flags, - .ret = NOT_DONE, - }; - - blk_inc_in_flight(blk); - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - co_entry(&rwco); - } else { - Coroutine *co = qemu_coroutine_create(co_entry, &rwco); - bdrv_coroutine_enter(blk_bs(blk), co); - BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE); - } - blk_dec_in_flight(blk); - - return rwco.ret; + IO_CODE(); + GRAPH_RDLOCK_GUARD(); + return bdrv_co_block_status_above(blk_bs(blk), base, offset, bytes, pnum, + map, file); } -int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int bytes, BdrvRequestFlags flags) +int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk, + BlockDriverState *base, + bool include_base, int64_t offset, + int64_t bytes, int64_t *pnum) { - return blk_prw(blk, offset, NULL, bytes, blk_write_entry, - flags | BDRV_REQ_ZERO_WRITE); + IO_CODE(); + GRAPH_RDLOCK_GUARD(); + return bdrv_co_is_allocated_above(blk_bs(blk), base, include_base, offset, + bytes, pnum); } +typedef struct BlkRwCo { + BlockBackend *blk; + int64_t offset; + void *iobuf; + int ret; + BdrvRequestFlags flags; +} BlkRwCo; + int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags) { + GLOBAL_STATE_CODE(); return bdrv_make_zero(blk->root, flags); } void blk_inc_in_flight(BlockBackend *blk) { + IO_CODE(); qatomic_inc(&blk->in_flight); } void blk_dec_in_flight(BlockBackend *blk) { + IO_CODE(); qatomic_dec(&blk->in_flight); aio_wait_kick(); } @@ -1398,13 +1526,14 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, void *opaque, int ret) { struct BlockBackendAIOCB *acb; + IO_CODE(); blk_inc_in_flight(blk); acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque); acb->blk = blk; acb->ret = ret; - replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), + replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(), error_callback_bh, acb); return &acb->common; } @@ -1412,20 +1541,12 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, typedef struct BlkAioEmAIOCB { BlockAIOCB common; BlkRwCo rwco; - int bytes; + int64_t bytes; bool has_returned; } BlkAioEmAIOCB; -static AioContext *blk_aio_em_aiocb_get_aio_context(BlockAIOCB *acb_) -{ - BlkAioEmAIOCB *acb = container_of(acb_, BlkAioEmAIOCB, common); - - return blk_get_aio_context(acb->rwco.blk); -} - static const AIOCBInfo blk_aio_em_aiocb_info = { .aiocb_size = sizeof(BlkAioEmAIOCB), - .get_aio_context = blk_aio_em_aiocb_get_aio_context, }; static void blk_aio_complete(BlkAioEmAIOCB *acb) @@ -1444,7 +1565,8 @@ static void blk_aio_complete_bh(void *opaque) blk_aio_complete(acb); } -static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, +static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, + int64_t bytes, void *iobuf, CoroutineEntry co_entry, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) @@ -1465,100 +1587,117 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, acb->has_returned = false; co = qemu_coroutine_create(co_entry, acb); - bdrv_coroutine_enter(blk_bs(blk), co); + aio_co_enter(qemu_get_current_aio_context(), co); acb->has_returned = true; if (acb->rwco.ret != NOT_DONE) { - replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), + replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(), blk_aio_complete_bh, acb); } return &acb->common; } -static void blk_aio_read_entry(void *opaque) +static void coroutine_fn blk_aio_read_entry(void *opaque) { BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; QEMUIOVector *qiov = rwco->iobuf; assert(qiov->size == acb->bytes); - rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, - qiov, rwco->flags); + rwco->ret = blk_co_do_preadv_part(rwco->blk, rwco->offset, acb->bytes, qiov, + 0, rwco->flags); blk_aio_complete(acb); } -static void blk_aio_write_entry(void *opaque) +static void coroutine_fn blk_aio_write_entry(void *opaque) { BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; QEMUIOVector *qiov = rwco->iobuf; assert(!qiov || qiov->size == acb->bytes); - rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, - qiov, 0, rwco->flags); + rwco->ret = blk_co_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, + qiov, 0, rwco->flags); blk_aio_complete(acb); } BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int count, BdrvRequestFlags flags, + int64_t bytes, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { - return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry, + IO_CODE(); + return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_write_entry, flags | BDRV_REQ_ZERO_WRITE, cb, opaque); } -int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count) +int64_t coroutine_fn blk_co_getlength(BlockBackend *blk) { - int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0); - if (ret < 0) { - return ret; - } - return count; -} + IO_CODE(); + GRAPH_RDLOCK_GUARD(); -int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count, - BdrvRequestFlags flags) -{ - int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry, - flags); - if (ret < 0) { - return ret; + if (!blk_co_is_available(blk)) { + return -ENOMEDIUM; } - return count; + + return bdrv_co_getlength(blk_bs(blk)); } -int64_t blk_getlength(BlockBackend *blk) +int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk) { - if (!blk_is_available(blk)) { + BlockDriverState *bs = blk_bs(blk); + + IO_CODE(); + GRAPH_RDLOCK_GUARD(); + + if (!bs) { return -ENOMEDIUM; + } else { + return bdrv_co_nb_sectors(bs); } - - return bdrv_getlength(blk_bs(blk)); } -void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr) +/* + * This wrapper is written by hand because this function is in the hot I/O path, + * via blk_get_geometry. + */ +int64_t coroutine_mixed_fn blk_nb_sectors(BlockBackend *blk) { - if (!blk_bs(blk)) { - *nb_sectors_ptr = 0; + BlockDriverState *bs = blk_bs(blk); + + IO_CODE(); + + if (!bs) { + return -ENOMEDIUM; } else { - bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr); + return bdrv_nb_sectors(bs); } } -int64_t blk_nb_sectors(BlockBackend *blk) +/* return 0 as number of sectors if no device present or error */ +void coroutine_fn blk_co_get_geometry(BlockBackend *blk, + uint64_t *nb_sectors_ptr) { - if (!blk_is_available(blk)) { - return -ENOMEDIUM; - } + int64_t ret = blk_co_nb_sectors(blk); + *nb_sectors_ptr = ret < 0 ? 0 : ret; +} - return bdrv_nb_sectors(blk_bs(blk)); +/* + * This wrapper is written by hand because this function is in the hot I/O path. + */ +void coroutine_mixed_fn blk_get_geometry(BlockBackend *blk, + uint64_t *nb_sectors_ptr) +{ + int64_t ret = blk_nb_sectors(blk); + *nb_sectors_ptr = ret < 0 ? 0 : ret; } BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); + assert((uint64_t)qiov->size <= INT64_MAX); return blk_aio_prwv(blk, offset, qiov->size, qiov, blk_aio_read_entry, flags, cb, opaque); } @@ -1567,53 +1706,59 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); + assert((uint64_t)qiov->size <= INT64_MAX); return blk_aio_prwv(blk, offset, qiov->size, qiov, blk_aio_write_entry, flags, cb, opaque); } void blk_aio_cancel(BlockAIOCB *acb) { + GLOBAL_STATE_CODE(); bdrv_aio_cancel(acb); } void blk_aio_cancel_async(BlockAIOCB *acb) { + IO_CODE(); bdrv_aio_cancel_async(acb); } /* To be called between exactly one pair of blk_inc/dec_in_flight() */ static int coroutine_fn -blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) +blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) { + IO_CODE(); + blk_wait_while_drained(blk); + GRAPH_RDLOCK_GUARD(); - if (!blk_is_available(blk)) { + if (!blk_co_is_available(blk)) { return -ENOMEDIUM; } return bdrv_co_ioctl(blk_bs(blk), req, buf); } -static void blk_ioctl_entry(void *opaque) +int coroutine_fn blk_co_ioctl(BlockBackend *blk, unsigned long int req, + void *buf) { - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; + int ret; + IO_OR_GS_CODE(); - rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base); - aio_wait_kick(); -} + blk_inc_in_flight(blk); + ret = blk_co_do_ioctl(blk, req, buf); + blk_dec_in_flight(blk); -int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) -{ - return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0); + return ret; } -static void blk_aio_ioctl_entry(void *opaque) +static void coroutine_fn blk_aio_ioctl_entry(void *opaque) { BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; - rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf); + rwco->ret = blk_co_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf); blk_aio_complete(acb); } @@ -1621,16 +1766,19 @@ static void blk_aio_ioctl_entry(void *opaque) BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); } /* To be called between exactly one pair of blk_inc/dec_in_flight() */ static int coroutine_fn -blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes) +blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes) { int ret; + IO_CODE(); blk_wait_while_drained(blk); + GRAPH_RDLOCK_GUARD(); ret = blk_check_byte_request(blk, offset, bytes); if (ret < 0) { @@ -1640,112 +1788,294 @@ blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes) return bdrv_co_pdiscard(blk->root, offset, bytes); } -static void blk_aio_pdiscard_entry(void *opaque) +static void coroutine_fn blk_aio_pdiscard_entry(void *opaque) { BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; - rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes); + rwco->ret = blk_co_do_pdiscard(rwco->blk, rwco->offset, acb->bytes); blk_aio_complete(acb); } BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, - int64_t offset, int bytes, + int64_t offset, int64_t bytes, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, cb, opaque); } -int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) +int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, + int64_t bytes) { int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); - ret = blk_do_pdiscard(blk, offset, bytes); + ret = blk_co_do_pdiscard(blk, offset, bytes); blk_dec_in_flight(blk); return ret; } -static void blk_pdiscard_entry(void *opaque) -{ - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - - rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size); - aio_wait_kick(); -} - -int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) -{ - return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); -} - /* To be called between exactly one pair of blk_inc/dec_in_flight() */ -static int coroutine_fn blk_do_flush(BlockBackend *blk) +static int coroutine_fn blk_co_do_flush(BlockBackend *blk) { + IO_CODE(); blk_wait_while_drained(blk); + GRAPH_RDLOCK_GUARD(); - if (!blk_is_available(blk)) { + if (!blk_co_is_available(blk)) { return -ENOMEDIUM; } return bdrv_co_flush(blk_bs(blk)); } -static void blk_aio_flush_entry(void *opaque) +static void coroutine_fn blk_aio_flush_entry(void *opaque) { BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; - rwco->ret = blk_do_flush(rwco->blk); + rwco->ret = blk_co_do_flush(rwco->blk); blk_aio_complete(acb); } BlockAIOCB *blk_aio_flush(BlockBackend *blk, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); } int coroutine_fn blk_co_flush(BlockBackend *blk) { int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); - ret = blk_do_flush(blk); + ret = blk_co_do_flush(blk); blk_dec_in_flight(blk); return ret; } -static void blk_flush_entry(void *opaque) +static void coroutine_fn blk_aio_zone_report_entry(void *opaque) { - BlkRwCo *rwco = opaque; - rwco->ret = blk_do_flush(rwco->blk); - aio_wait_kick(); + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + + rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset, + (unsigned int*)(uintptr_t)acb->bytes, + rwco->iobuf); + blk_aio_complete(acb); +} + +BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset, + unsigned int *nr_zones, + BlockZoneDescriptor *zones, + BlockCompletionFunc *cb, void *opaque) +{ + BlkAioEmAIOCB *acb; + Coroutine *co; + IO_CODE(); + + blk_inc_in_flight(blk); + acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); + acb->rwco = (BlkRwCo) { + .blk = blk, + .offset = offset, + .iobuf = zones, + .ret = NOT_DONE, + }; + acb->bytes = (int64_t)(uintptr_t)nr_zones, + acb->has_returned = false; + + co = qemu_coroutine_create(blk_aio_zone_report_entry, acb); + aio_co_enter(qemu_get_current_aio_context(), co); + + acb->has_returned = true; + if (acb->rwco.ret != NOT_DONE) { + replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(), + blk_aio_complete_bh, acb); + } + + return &acb->common; } -int blk_flush(BlockBackend *blk) +static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque) { - return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0); + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + + rwco->ret = blk_co_zone_mgmt(rwco->blk, + (BlockZoneOp)(uintptr_t)rwco->iobuf, + rwco->offset, acb->bytes); + blk_aio_complete(acb); +} + +BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op, + int64_t offset, int64_t len, + BlockCompletionFunc *cb, void *opaque) { + BlkAioEmAIOCB *acb; + Coroutine *co; + IO_CODE(); + + blk_inc_in_flight(blk); + acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); + acb->rwco = (BlkRwCo) { + .blk = blk, + .offset = offset, + .iobuf = (void *)(uintptr_t)op, + .ret = NOT_DONE, + }; + acb->bytes = len; + acb->has_returned = false; + + co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb); + aio_co_enter(qemu_get_current_aio_context(), co); + + acb->has_returned = true; + if (acb->rwco.ret != NOT_DONE) { + replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(), + blk_aio_complete_bh, acb); + } + + return &acb->common; +} + +static void coroutine_fn blk_aio_zone_append_entry(void *opaque) +{ + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + + rwco->ret = blk_co_zone_append(rwco->blk, (int64_t *)(uintptr_t)acb->bytes, + rwco->iobuf, rwco->flags); + blk_aio_complete(acb); +} + +BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset, + QEMUIOVector *qiov, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque) { + BlkAioEmAIOCB *acb; + Coroutine *co; + IO_CODE(); + + blk_inc_in_flight(blk); + acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); + acb->rwco = (BlkRwCo) { + .blk = blk, + .ret = NOT_DONE, + .flags = flags, + .iobuf = qiov, + }; + acb->bytes = (int64_t)(uintptr_t)offset; + acb->has_returned = false; + + co = qemu_coroutine_create(blk_aio_zone_append_entry, acb); + aio_co_enter(qemu_get_current_aio_context(), co); + acb->has_returned = true; + if (acb->rwco.ret != NOT_DONE) { + replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(), + blk_aio_complete_bh, acb); + } + + return &acb->common; +} + +/* + * Send a zone_report command. + * offset is a byte offset from the start of the device. No alignment + * required for offset. + * nr_zones represents IN maximum and OUT actual. + */ +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset, + unsigned int *nr_zones, + BlockZoneDescriptor *zones) +{ + int ret; + IO_CODE(); + + blk_inc_in_flight(blk); /* increase before waiting */ + blk_wait_while_drained(blk); + GRAPH_RDLOCK_GUARD(); + if (!blk_is_available(blk)) { + blk_dec_in_flight(blk); + return -ENOMEDIUM; + } + ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones); + blk_dec_in_flight(blk); + return ret; +} + +/* + * Send a zone_management command. + * op is the zone operation; + * offset is the byte offset from the start of the zoned device; + * len is the maximum number of bytes the command should operate on. It + * should be aligned with the device zone size. + */ +int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op, + int64_t offset, int64_t len) +{ + int ret; + IO_CODE(); + + blk_inc_in_flight(blk); + blk_wait_while_drained(blk); + GRAPH_RDLOCK_GUARD(); + + ret = blk_check_byte_request(blk, offset, len); + if (ret < 0) { + blk_dec_in_flight(blk); + return ret; + } + + ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len); + blk_dec_in_flight(blk); + return ret; +} + +/* + * Send a zone_append command. + */ +int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset, + QEMUIOVector *qiov, BdrvRequestFlags flags) +{ + int ret; + IO_CODE(); + + blk_inc_in_flight(blk); + blk_wait_while_drained(blk); + GRAPH_RDLOCK_GUARD(); + if (!blk_is_available(blk)) { + blk_dec_in_flight(blk); + return -ENOMEDIUM; + } + + ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags); + blk_dec_in_flight(blk); + return ret; } void blk_drain(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { + bdrv_ref(bs); bdrv_drained_begin(bs); } /* We may have -ENOMEDIUM completions in flight */ AIO_WAIT_WHILE(blk_get_aio_context(blk), - qatomic_mb_read(&blk->in_flight) > 0); + qatomic_read(&blk->in_flight) > 0); if (bs) { bdrv_drained_end(bs); + bdrv_unref(bs); } } @@ -1753,17 +2083,13 @@ void blk_drain_all(void) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); + bdrv_drain_all_begin(); while ((blk = blk_all_next(blk)) != NULL) { - AioContext *ctx = blk_get_aio_context(blk); - - aio_context_acquire(ctx); - /* We may have -ENOMEDIUM completions in flight */ - AIO_WAIT_WHILE(ctx, qatomic_mb_read(&blk->in_flight) > 0); - - aio_context_release(ctx); + AIO_WAIT_WHILE_UNLOCKED(NULL, qatomic_read(&blk->in_flight) > 0); } bdrv_drain_all_end(); @@ -1772,12 +2098,14 @@ void blk_drain_all(void) void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, BlockdevOnError on_write_error) { + GLOBAL_STATE_CODE(); blk->on_read_error = on_read_error; blk->on_write_error = on_write_error; } BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read) { + IO_CODE(); return is_read ? blk->on_read_error : blk->on_write_error; } @@ -1785,6 +2113,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, int error) { BlockdevOnError on_err = blk_get_on_error(blk, is_read); + IO_CODE(); switch (on_err) { case BLOCKDEV_ON_ERROR_ENOSPC: @@ -1810,7 +2139,7 @@ static void send_qmp_error_event(BlockBackend *blk, BlockDriverState *bs = blk_bs(blk); optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; - qapi_event_send_block_io_error(blk_name(blk), !!bs, + qapi_event_send_block_io_error(blk_name(blk), bs ? bdrv_get_node_name(bs) : NULL, optype, action, blk_iostatus_is_enabled(blk), error == ENOSPC, strerror(error)); @@ -1824,6 +2153,7 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, bool is_read, int error) { assert(error >= 0); + IO_CODE(); if (action == BLOCK_ERROR_ACTION_STOP) { /* First set the iostatus, so that "info block" returns an iostatus @@ -1855,6 +2185,7 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, bool blk_supports_write_perm(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { return !bdrv_is_read_only(bs); @@ -1869,12 +2200,14 @@ bool blk_supports_write_perm(BlockBackend *blk) */ bool blk_is_writable(BlockBackend *blk) { + IO_CODE(); return blk->perm & BLK_PERM_WRITE; } bool blk_is_sg(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (!bs) { return false; @@ -1885,54 +2218,73 @@ bool blk_is_sg(BlockBackend *blk) bool blk_enable_write_cache(BlockBackend *blk) { + IO_CODE(); return blk->enable_write_cache; } void blk_set_enable_write_cache(BlockBackend *blk, bool wce) { + IO_CODE(); blk->enable_write_cache = wce; } -void blk_invalidate_cache(BlockBackend *blk, Error **errp) +void blk_activate(BlockBackend *blk, Error **errp) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (!bs) { error_setg(errp, "Device '%s' has no medium", blk->name); return; } - bdrv_invalidate_cache(bs, errp); + /* + * Migration code can call this function in coroutine context, so leave + * coroutine context if necessary. + */ + if (qemu_in_coroutine()) { + bdrv_co_activate(bs, errp); + } else { + GRAPH_RDLOCK_GUARD_MAINLOOP(); + bdrv_activate(bs, errp); + } } -bool blk_is_inserted(BlockBackend *blk) +bool coroutine_fn blk_co_is_inserted(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + IO_CODE(); + assert_bdrv_graph_readable(); - return bs && bdrv_is_inserted(bs); + return bs && bdrv_co_is_inserted(bs); } -bool blk_is_available(BlockBackend *blk) +bool coroutine_fn blk_co_is_available(BlockBackend *blk) { - return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk); + IO_CODE(); + return blk_co_is_inserted(blk) && !blk_dev_is_tray_open(blk); } -void blk_lock_medium(BlockBackend *blk, bool locked) +void coroutine_fn blk_co_lock_medium(BlockBackend *blk, bool locked) { BlockDriverState *bs = blk_bs(blk); + IO_CODE(); + GRAPH_RDLOCK_GUARD(); if (bs) { - bdrv_lock_medium(bs, locked); + bdrv_co_lock_medium(bs, locked); } } -void blk_eject(BlockBackend *blk, bool eject_flag) +void coroutine_fn blk_co_eject(BlockBackend *blk, bool eject_flag) { BlockDriverState *bs = blk_bs(blk); char *id; + IO_CODE(); + GRAPH_RDLOCK_GUARD(); if (bs) { - bdrv_eject(bs, eject_flag); + bdrv_co_eject(bs, eject_flag); } /* Whether or not we ejected on the backend, @@ -1946,6 +2298,7 @@ void blk_eject(BlockBackend *blk, bool eject_flag) int blk_get_flags(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { return bdrv_get_flags(bs); @@ -1958,6 +2311,7 @@ int blk_get_flags(BlockBackend *blk) uint32_t blk_get_request_alignment(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + IO_CODE(); return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE; } @@ -1966,6 +2320,7 @@ uint64_t blk_get_max_hw_transfer(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); uint64_t max = INT_MAX; + IO_CODE(); if (bs) { max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer); @@ -1979,6 +2334,7 @@ uint32_t blk_get_max_transfer(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); uint32_t max = INT_MAX; + IO_CODE(); if (bs) { max = MIN_NON_ZERO(max, bs->bl.max_transfer); @@ -1986,29 +2342,36 @@ uint32_t blk_get_max_transfer(BlockBackend *blk) return ROUND_DOWN(max, blk_get_request_alignment(blk)); } -int blk_get_max_iov(BlockBackend *blk) +int blk_get_max_hw_iov(BlockBackend *blk) { - return blk->root->bs->bl.max_iov; + IO_CODE(); + return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov, + blk->root->bs->bl.max_iov); } -void blk_set_guest_block_size(BlockBackend *blk, int align) +int blk_get_max_iov(BlockBackend *blk) { - blk->guest_block_size = align; + IO_CODE(); + return blk->root->bs->bl.max_iov; } void *blk_try_blockalign(BlockBackend *blk, size_t size) { + IO_CODE(); return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size); } void *blk_blockalign(BlockBackend *blk, size_t size) { + IO_CODE(); return qemu_blockalign(blk ? blk_bs(blk) : NULL, size); } bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); if (!bs) { return false; @@ -2020,6 +2383,7 @@ bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp) void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { bdrv_op_unblock(bs, op, reason); @@ -2029,6 +2393,7 @@ void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason) void blk_op_block_all(BlockBackend *blk, Error *reason) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { bdrv_op_block_all(bs, reason); @@ -2038,96 +2403,119 @@ void blk_op_block_all(BlockBackend *blk, Error *reason) void blk_op_unblock_all(BlockBackend *blk, Error *reason) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { bdrv_op_unblock_all(bs, reason); } } +/** + * Return BB's current AioContext. Note that this context may change + * concurrently at any time, with one exception: If the BB has a root node + * attached, its context will only change through bdrv_try_change_aio_context(), + * which creates a drained section. Therefore, incrementing such a BB's + * in-flight counter will prevent its context from changing. + */ AioContext *blk_get_aio_context(BlockBackend *blk) { - BlockDriverState *bs = blk_bs(blk); + IO_CODE(); - if (bs) { - AioContext *ctx = bdrv_get_aio_context(blk_bs(blk)); - assert(ctx == blk->ctx); + if (!blk) { + return qemu_get_aio_context(); } - return blk->ctx; -} - -static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb) -{ - BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb); - return blk_get_aio_context(blk_acb->blk); + return qatomic_read(&blk->ctx); } -static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, - bool update_root_node, Error **errp) +int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, + Error **errp) { + bool old_allow_change; BlockDriverState *bs = blk_bs(blk); - ThrottleGroupMember *tgm = &blk->public.throttle_group_member; int ret; - if (bs) { - if (update_root_node) { - ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root, - errp); - if (ret < 0) { - return ret; - } - } - if (tgm->throttle_state) { - bdrv_drained_begin(bs); - throttle_group_detach_aio_context(tgm); - throttle_group_attach_aio_context(tgm, new_context); - bdrv_drained_end(bs); - } + GLOBAL_STATE_CODE(); + + if (!bs) { + qatomic_set(&blk->ctx, new_context); + return 0; } - blk->ctx = new_context; - return 0; + bdrv_ref(bs); + + old_allow_change = blk->allow_aio_context_change; + blk->allow_aio_context_change = true; + + ret = bdrv_try_change_aio_context(bs, new_context, NULL, errp); + + blk->allow_aio_context_change = old_allow_change; + + bdrv_unref(bs); + return ret; } -int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, - Error **errp) +typedef struct BdrvStateBlkRootContext { + AioContext *new_ctx; + BlockBackend *blk; +} BdrvStateBlkRootContext; + +static void blk_root_set_aio_ctx_commit(void *opaque) { - return blk_do_set_aio_context(blk, new_context, true, errp); + BdrvStateBlkRootContext *s = opaque; + BlockBackend *blk = s->blk; + AioContext *new_context = s->new_ctx; + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + + qatomic_set(&blk->ctx, new_context); + if (tgm->throttle_state) { + throttle_group_detach_aio_context(tgm); + throttle_group_attach_aio_context(tgm, new_context); + } } -static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, - GSList **ignore, Error **errp) +static TransactionActionDrv set_blk_root_context = { + .commit = blk_root_set_aio_ctx_commit, + .clean = g_free, +}; + +static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp) { BlockBackend *blk = child->opaque; + BdrvStateBlkRootContext *s; - if (blk->allow_aio_context_change) { - return true; + if (!blk->allow_aio_context_change) { + /* + * Manually created BlockBackends (those with a name) that are not + * attached to anything can change their AioContext without updating + * their user; return an error for others. + */ + if (!blk->name || blk->dev) { + /* TODO Add BB name/QOM path */ + error_setg(errp, "Cannot change iothread of active block backend"); + return false; + } } - /* Only manually created BlockBackends that are not attached to anything - * can change their AioContext without updating their user. */ - if (!blk->name || blk->dev) { - /* TODO Add BB name/QOM path */ - error_setg(errp, "Cannot change iothread of active block backend"); - return false; - } + s = g_new(BdrvStateBlkRootContext, 1); + *s = (BdrvStateBlkRootContext) { + .new_ctx = ctx, + .blk = blk, + }; + tran_add(tran, &set_blk_root_context, s); return true; } -static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, - GSList **ignore) -{ - BlockBackend *blk = child->opaque; - blk_do_set_aio_context(blk, ctx, false, &error_abort); -} - void blk_add_aio_context_notifier(BlockBackend *blk, void (*attached_aio_context)(AioContext *new_context, void *opaque), void (*detach_aio_context)(void *opaque), void *opaque) { BlockBackendAioNotifier *notifier; BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); notifier = g_new(BlockBackendAioNotifier, 1); notifier->attached_aio_context = attached_aio_context; @@ -2150,6 +2538,8 @@ void blk_remove_aio_context_notifier(BlockBackend *blk, BlockBackendAioNotifier *notifier; BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); + if (bs) { bdrv_remove_aio_context_notifier(bs, attached_aio_context, detach_aio_context, opaque); @@ -2170,72 +2560,65 @@ void blk_remove_aio_context_notifier(BlockBackend *blk, void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify) { + GLOBAL_STATE_CODE(); notifier_list_add(&blk->remove_bs_notifiers, notify); } void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify) { + GLOBAL_STATE_CODE(); notifier_list_add(&blk->insert_bs_notifiers, notify); } -void blk_io_plug(BlockBackend *blk) -{ - BlockDriverState *bs = blk_bs(blk); - - if (bs) { - bdrv_io_plug(bs); - } -} - -void blk_io_unplug(BlockBackend *blk) -{ - BlockDriverState *bs = blk_bs(blk); - - if (bs) { - bdrv_io_unplug(bs); - } -} - BlockAcctStats *blk_get_stats(BlockBackend *blk) { + IO_CODE(); return &blk->stats; } void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque); } int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int bytes, BdrvRequestFlags flags) + int64_t bytes, BdrvRequestFlags flags) { + IO_OR_GS_CODE(); return blk_co_pwritev(blk, offset, bytes, NULL, flags | BDRV_REQ_ZERO_WRITE); } -int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, - int count) +int coroutine_fn blk_co_pwrite_compressed(BlockBackend *blk, int64_t offset, + int64_t bytes, const void *buf) { - return blk_prw(blk, offset, (void *) buf, count, blk_write_entry, - BDRV_REQ_WRITE_COMPRESSED); + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_OR_GS_CODE(); + return blk_co_pwritev_part(blk, offset, bytes, &qiov, 0, + BDRV_REQ_WRITE_COMPRESSED); } -int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, - PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) +int coroutine_fn blk_co_truncate(BlockBackend *blk, int64_t offset, bool exact, + PreallocMode prealloc, BdrvRequestFlags flags, + Error **errp) { - if (!blk_is_available(blk)) { + IO_OR_GS_CODE(); + GRAPH_RDLOCK_GUARD(); + if (!blk_co_is_available(blk)) { error_setg(errp, "No medium inserted"); return -ENOMEDIUM; } - return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp); + return bdrv_co_truncate(blk->root, offset, exact, prealloc, flags, errp); } int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int64_t pos, int size) { int ret; + GLOBAL_STATE_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; @@ -2255,6 +2638,7 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size) { + GLOBAL_STATE_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -2264,6 +2648,9 @@ int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size) int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz) { + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -2273,6 +2660,7 @@ int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz) int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo) { + GLOBAL_STATE_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -2286,6 +2674,7 @@ int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo) */ void blk_update_root_state(BlockBackend *blk) { + GLOBAL_STATE_CODE(); assert(blk->root); blk->root_state.open_flags = blk->root->bs->open_flags; @@ -2298,6 +2687,7 @@ void blk_update_root_state(BlockBackend *blk) */ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->root_state.detect_zeroes; } @@ -2307,33 +2697,33 @@ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) */ int blk_get_open_flags_from_root_state(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->root_state.open_flags; } BlockBackendRootState *blk_get_root_state(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return &blk->root_state; } int blk_commit_all(void) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); while ((blk = blk_all_next(blk)) != NULL) { - AioContext *aio_context = blk_get_aio_context(blk); BlockDriverState *unfiltered_bs = bdrv_skip_filters(blk_bs(blk)); - aio_context_acquire(aio_context); if (blk_is_inserted(blk) && bdrv_cow_child(unfiltered_bs)) { int ret; ret = bdrv_commit(unfiltered_bs); if (ret < 0) { - aio_context_release(aio_context); return ret; } } - aio_context_release(aio_context); } return 0; } @@ -2342,6 +2732,7 @@ int blk_commit_all(void) /* throttling disk I/O limits */ void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg) { + GLOBAL_STATE_CODE(); throttle_group_config(&blk->public.throttle_group_member, cfg); } @@ -2350,12 +2741,15 @@ void blk_io_limits_disable(BlockBackend *blk) BlockDriverState *bs = blk_bs(blk); ThrottleGroupMember *tgm = &blk->public.throttle_group_member; assert(tgm->throttle_state); + GLOBAL_STATE_CODE(); if (bs) { + bdrv_ref(bs); bdrv_drained_begin(bs); } throttle_group_unregister_tgm(tgm); if (bs) { bdrv_drained_end(bs); + bdrv_unref(bs); } } @@ -2363,12 +2757,14 @@ void blk_io_limits_disable(BlockBackend *blk) void blk_io_limits_enable(BlockBackend *blk, const char *group) { assert(!blk->public.throttle_group_member.throttle_state); + GLOBAL_STATE_CODE(); throttle_group_register_tgm(&blk->public.throttle_group_member, group, blk_get_aio_context(blk)); } void blk_io_limits_update_group(BlockBackend *blk, const char *group) { + GLOBAL_STATE_CODE(); /* this BB is not part of any group */ if (!blk->public.throttle_group_member.throttle_state) { return; @@ -2390,7 +2786,7 @@ static void blk_root_drained_begin(BdrvChild *child) BlockBackend *blk = child->opaque; ThrottleGroupMember *tgm = &blk->public.throttle_group_member; - if (++blk->quiesce_counter == 1) { + if (qatomic_fetch_inc(&blk->quiesce_counter) == 0) { if (blk->dev_ops && blk->dev_ops->drained_begin) { blk->dev_ops->drained_begin(blk->dev_opaque); } @@ -2408,7 +2804,7 @@ static bool blk_root_drained_poll(BdrvChild *child) { BlockBackend *blk = child->opaque; bool busy = false; - assert(blk->quiesce_counter); + assert(qatomic_read(&blk->quiesce_counter)); if (blk->dev_ops && blk->dev_ops->drained_poll) { busy = blk->dev_ops->drained_poll(blk->dev_opaque); @@ -2416,40 +2812,59 @@ static bool blk_root_drained_poll(BdrvChild *child) return busy || !!blk->in_flight; } -static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) +static void blk_root_drained_end(BdrvChild *child) { BlockBackend *blk = child->opaque; - assert(blk->quiesce_counter); + assert(qatomic_read(&blk->quiesce_counter)); assert(blk->public.throttle_group_member.io_limits_disabled); qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled); - if (--blk->quiesce_counter == 0) { + if (qatomic_fetch_dec(&blk->quiesce_counter) == 1) { if (blk->dev_ops && blk->dev_ops->drained_end) { blk->dev_ops->drained_end(blk->dev_opaque); } - while (qemu_co_enter_next(&blk->queued_requests, NULL)) { + qemu_mutex_lock(&blk->queued_requests_lock); + while (qemu_co_enter_next(&blk->queued_requests, + &blk->queued_requests_lock)) { /* Resume all queued requests */ } + qemu_mutex_unlock(&blk->queued_requests_lock); } } -void blk_register_buf(BlockBackend *blk, void *host, size_t size) +bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp) { - bdrv_register_buf(blk_bs(blk), host, size); + BlockDriverState *bs = blk_bs(blk); + + GLOBAL_STATE_CODE(); + + if (bs) { + return bdrv_register_buf(bs, host, size, errp); + } + return true; } -void blk_unregister_buf(BlockBackend *blk, void *host) +void blk_unregister_buf(BlockBackend *blk, void *host, size_t size) { - bdrv_unregister_buf(blk_bs(blk), host); + BlockDriverState *bs = blk_bs(blk); + + GLOBAL_STATE_CODE(); + + if (bs) { + bdrv_unregister_buf(bs, host, size); + } } int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, BlockBackend *blk_out, int64_t off_out, - int bytes, BdrvRequestFlags read_flags, + int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { int r; + IO_CODE(); + GRAPH_RDLOCK_GUARD(); + r = blk_check_byte_request(blk_in, off_in, bytes); if (r) { return r; @@ -2458,6 +2873,7 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, if (r) { return r; } + return bdrv_co_copy_range(blk_in->root, off_in, blk_out->root, off_out, bytes, read_flags, write_flags); @@ -2465,11 +2881,15 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, const BdrvChild *blk_root(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->root; } int blk_make_empty(BlockBackend *blk, Error **errp) { + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + if (!blk_is_available(blk)) { error_setg(errp, "No medium inserted"); return -ENOMEDIUM; |