aboutsummaryrefslogtreecommitdiff
path: root/block/block-backend.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/block-backend.c')
-rw-r--r--block/block-backend.c1092
1 files changed, 756 insertions, 336 deletions
diff --git a/block/block-backend.c b/block/block-backend.c
index 6140d133e2..db6f9b92a3 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -14,6 +14,7 @@
#include "sysemu/block-backend.h"
#include "block/block_int.h"
#include "block/blockjob.h"
+#include "block/coroutines.h"
#include "block/throttle-groups.h"
#include "hw/qdev-core.h"
#include "sysemu/blockdev.h"
@@ -32,8 +33,6 @@
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
-static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
-
typedef struct BlockBackendAioNotifier {
void (*attached_aio_context)(AioContext *new_context, void *opaque);
void (*detach_aio_context)(void *opaque);
@@ -45,7 +44,7 @@ struct BlockBackend {
char *name;
int refcnt;
BdrvChild *root;
- AioContext *ctx;
+ AioContext *ctx; /* access with atomic operations only */
DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
@@ -55,9 +54,6 @@ struct BlockBackend {
const BlockDevOps *dev_ops;
void *dev_opaque;
- /* the block size for which the guest device expects atomicity */
- int guest_block_size;
-
/* If the BDS tree is removed, some of its options are stored here (which
* can be used to restore those options in the new BDS on insert) */
BlockBackendRootState root_state;
@@ -78,12 +74,14 @@ struct BlockBackend {
bool allow_aio_context_change;
bool allow_write_beyond_eof;
+ /* Protected by BQL */
NotifierList remove_bs_notifiers, insert_bs_notifiers;
QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
- int quiesce_counter;
+ int quiesce_counter; /* atomic: written under BQL, read by other threads */
+ QemuMutex queued_requests_lock; /* protects queued_requests */
CoQueue queued_requests;
- bool disable_request_queuing;
+ bool disable_request_queuing; /* atomic */
VMChangeStateEntry *vmsh;
bool force_allow_inactivate;
@@ -103,22 +101,27 @@ typedef struct BlockBackendAIOCB {
} BlockBackendAIOCB;
static const AIOCBInfo block_backend_aiocb_info = {
- .get_aio_context = blk_aiocb_get_aio_context,
.aiocb_size = sizeof(BlockBackendAIOCB),
};
static void drive_info_del(DriveInfo *dinfo);
static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
-/* All BlockBackends */
+/* All BlockBackends. Protected by BQL. */
static QTAILQ_HEAD(, BlockBackend) block_backends =
QTAILQ_HEAD_INITIALIZER(block_backends);
-/* All BlockBackends referenced by the monitor and which are iterated through by
- * blk_next() */
+/*
+ * All BlockBackends referenced by the monitor and which are iterated through by
+ * blk_next(). Protected by BQL.
+ */
static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
+static int coroutine_mixed_fn GRAPH_RDLOCK
+blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+ Error **errp);
+
static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
int *child_flags, QDict *child_options,
int parent_flags, QDict *parent_options)
@@ -128,15 +131,14 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
}
static void blk_root_drained_begin(BdrvChild *child);
static bool blk_root_drained_poll(BdrvChild *child);
-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter);
+static void blk_root_drained_end(BdrvChild *child);
static void blk_root_change_media(BdrvChild *child, bool load);
static void blk_root_resize(BdrvChild *child);
-static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
- GSList **ignore, Error **errp);
-static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
- GSList **ignore);
+static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx,
+ GHashTable *visited, Transaction *tran,
+ Error **errp);
static char *blk_root_get_parent_desc(BdrvChild *child)
{
@@ -185,10 +187,11 @@ static void blk_vm_state_changed(void *opaque, bool running, RunState state)
*
* If an error is returned, the VM cannot be allowed to be resumed.
*/
-static void blk_root_activate(BdrvChild *child, Error **errp)
+static void GRAPH_RDLOCK blk_root_activate(BdrvChild *child, Error **errp)
{
BlockBackend *blk = child->opaque;
Error *local_err = NULL;
+ uint64_t saved_shared_perm;
if (!blk->disable_perm) {
return;
@@ -196,12 +199,22 @@ static void blk_root_activate(BdrvChild *child, Error **errp)
blk->disable_perm = false;
- blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err);
+ /*
+ * blk->shared_perm contains the permissions we want to share once
+ * migration is really completely done. For now, we need to share
+ * all; but we also need to retain blk->shared_perm, which is
+ * overwritten by a successful blk_set_perm() call. Save it and
+ * restore it below.
+ */
+ saved_shared_perm = blk->shared_perm;
+
+ blk_set_perm_locked(blk, blk->perm, BLK_PERM_ALL, &local_err);
if (local_err) {
error_propagate(errp, local_err);
blk->disable_perm = true;
return;
}
+ blk->shared_perm = saved_shared_perm;
if (runstate_check(RUN_STATE_INMIGRATE)) {
/* Activation can happen when migration process is still active, for
@@ -214,7 +227,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp)
return;
}
- blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
+ blk_set_perm_locked(blk, blk->perm, blk->shared_perm, &local_err);
if (local_err) {
error_propagate(errp, local_err);
blk->disable_perm = true;
@@ -224,6 +237,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp)
void blk_set_force_allow_inactivate(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
blk->force_allow_inactivate = true;
}
@@ -246,7 +260,7 @@ static bool blk_can_inactivate(BlockBackend *blk)
return blk->force_allow_inactivate;
}
-static int blk_root_inactivate(BdrvChild *child)
+static int GRAPH_RDLOCK blk_root_inactivate(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
@@ -299,6 +313,7 @@ static void blk_root_detach(BdrvChild *child)
static AioContext *blk_root_get_parent_aio_context(BdrvChild *c)
{
BlockBackend *blk = c->opaque;
+ IO_CODE();
return blk_get_aio_context(blk);
}
@@ -321,8 +336,7 @@ static const BdrvChildClass child_root = {
.attach = blk_root_attach,
.detach = blk_root_detach,
- .can_set_aio_ctx = blk_root_can_set_aio_ctx,
- .set_aio_ctx = blk_root_set_aio_ctx,
+ .change_aio_ctx = blk_root_change_aio_ctx,
.get_parent_aio_context = blk_root_get_parent_aio_context,
};
@@ -342,6 +356,8 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
{
BlockBackend *blk;
+ GLOBAL_STATE_CODE();
+
blk = g_new0(BlockBackend, 1);
blk->refcnt = 1;
blk->ctx = ctx;
@@ -354,6 +370,7 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
block_acct_init(&blk->stats);
+ qemu_mutex_init(&blk->queued_requests_lock);
qemu_co_queue_init(&blk->queued_requests);
notifier_list_init(&blk->remove_bs_notifiers);
notifier_list_init(&blk->insert_bs_notifiers);
@@ -379,6 +396,8 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
{
BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm);
+ GLOBAL_STATE_CODE();
+
if (blk_insert_bs(blk, bs, errp) < 0) {
blk_unref(blk);
return NULL;
@@ -388,7 +407,9 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
/*
* Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
- * The new BlockBackend is in the main AioContext.
+ * By default, the new BlockBackend is in the main AioContext, but if the
+ * parameters connect it with any existing node in a different AioContext, it
+ * may end up there instead.
*
* Just as with bdrv_open(), after having called this function the reference to
* @options belongs to the block layer (even on failure).
@@ -407,6 +428,8 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
uint64_t perm = 0;
uint64_t shared = BLK_PERM_ALL;
+ GLOBAL_STATE_CODE();
+
/*
* blk_new_open() is mainly used in .bdrv_create implementations and the
* tools where sharing isn't a major concern because the BDS stays private
@@ -431,16 +454,19 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
}
- blk = blk_new(qemu_get_aio_context(), perm, shared);
bs = bdrv_open(filename, reference, options, flags, errp);
if (!bs) {
- blk_unref(blk);
return NULL;
}
- blk->root = bdrv_root_attach_child(bs, "root", &child_root,
- BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
- perm, shared, blk, errp);
+ /* bdrv_open() could have moved bs to a different AioContext */
+ blk = blk_new(bdrv_get_aio_context(bs), perm, shared);
+ blk->perm = perm;
+ blk->shared_perm = shared;
+
+ blk_insert_bs(blk, bs, errp);
+ bdrv_unref(bs);
+
if (!blk->root) {
blk_unref(blk);
return NULL;
@@ -467,6 +493,8 @@ static void blk_delete(BlockBackend *blk)
assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
assert(QLIST_EMPTY(&blk->aio_notifiers));
+ assert(qemu_co_queue_empty(&blk->queued_requests));
+ qemu_mutex_destroy(&blk->queued_requests_lock);
QTAILQ_REMOVE(&block_backends, blk, link);
drive_info_del(blk->legacy_dinfo);
block_acct_cleanup(&blk->stats);
@@ -484,6 +512,7 @@ static void drive_info_del(DriveInfo *dinfo)
int blk_get_refcnt(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk ? blk->refcnt : 0;
}
@@ -494,6 +523,7 @@ int blk_get_refcnt(BlockBackend *blk)
void blk_ref(BlockBackend *blk)
{
assert(blk->refcnt > 0);
+ GLOBAL_STATE_CODE();
blk->refcnt++;
}
@@ -504,6 +534,7 @@ void blk_ref(BlockBackend *blk)
*/
void blk_unref(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
if (blk) {
assert(blk->refcnt > 0);
if (blk->refcnt > 1) {
@@ -524,6 +555,7 @@ void blk_unref(BlockBackend *blk)
*/
BlockBackend *blk_all_next(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk ? QTAILQ_NEXT(blk, link)
: QTAILQ_FIRST(&block_backends);
}
@@ -532,14 +564,12 @@ void blk_remove_all_bs(void)
{
BlockBackend *blk = NULL;
- while ((blk = blk_all_next(blk)) != NULL) {
- AioContext *ctx = blk_get_aio_context(blk);
+ GLOBAL_STATE_CODE();
- aio_context_acquire(ctx);
+ while ((blk = blk_all_next(blk)) != NULL) {
if (blk->root) {
blk_remove_bs(blk);
}
- aio_context_release(ctx);
}
}
@@ -555,6 +585,7 @@ void blk_remove_all_bs(void)
*/
BlockBackend *blk_next(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk ? QTAILQ_NEXT(blk, monitor_link)
: QTAILQ_FIRST(&monitor_block_backends);
}
@@ -568,14 +599,14 @@ BlockDriverState *bdrv_next(BdrvNextIterator *it)
/* Must be called from the main loop */
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+ old_bs = it->bs;
+
/* First, return all root nodes of BlockBackends. In order to avoid
* returning a BDS twice when multiple BBs refer to it, we only return it
* if the BB is the first one in the parent list of the BDS. */
if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
BlockBackend *old_blk = it->blk;
- old_bs = old_blk ? blk_bs(old_blk) : NULL;
-
do {
it->blk = blk_all_next(it->blk);
bs = it->blk ? blk_bs(it->blk) : NULL;
@@ -589,11 +620,10 @@ BlockDriverState *bdrv_next(BdrvNextIterator *it)
if (bs) {
bdrv_ref(bs);
bdrv_unref(old_bs);
+ it->bs = bs;
return bs;
}
it->phase = BDRV_NEXT_MONITOR_OWNED;
- } else {
- old_bs = it->bs;
}
/* Then return the monitor-owned BDSes without a BB attached. Ignore all
@@ -621,6 +651,7 @@ static void bdrv_next_reset(BdrvNextIterator *it)
BlockDriverState *bdrv_first(BdrvNextIterator *it)
{
+ GLOBAL_STATE_CODE();
bdrv_next_reset(it);
return bdrv_next(it);
}
@@ -632,13 +663,10 @@ void bdrv_next_cleanup(BdrvNextIterator *it)
/* Must be called from the main loop */
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
- if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
- if (it->blk) {
- bdrv_unref(blk_bs(it->blk));
- blk_unref(it->blk);
- }
- } else {
- bdrv_unref(it->bs);
+ bdrv_unref(it->bs);
+
+ if (it->phase == BDRV_NEXT_BACKEND_ROOTS && it->blk) {
+ blk_unref(it->blk);
}
bdrv_next_reset(it);
@@ -658,6 +686,7 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
{
assert(!blk->name);
assert(name && name[0]);
+ GLOBAL_STATE_CODE();
if (!id_wellformed(name)) {
error_setg(errp, "Invalid device name");
@@ -685,6 +714,8 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
*/
void monitor_remove_blk(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
+
if (!blk->name) {
return;
}
@@ -700,6 +731,7 @@ void monitor_remove_blk(BlockBackend *blk)
*/
const char *blk_name(const BlockBackend *blk)
{
+ IO_CODE();
return blk->name ?: "";
}
@@ -711,6 +743,7 @@ BlockBackend *blk_by_name(const char *name)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
assert(name);
while ((blk = blk_next(blk)) != NULL) {
if (!strcmp(name, blk->name)) {
@@ -725,12 +758,17 @@ BlockBackend *blk_by_name(const char *name)
*/
BlockDriverState *blk_bs(BlockBackend *blk)
{
+ IO_CODE();
return blk->root ? blk->root->bs : NULL;
}
-static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
+static BlockBackend * GRAPH_RDLOCK bdrv_first_blk(BlockDriverState *bs)
{
BdrvChild *child;
+
+ GLOBAL_STATE_CODE();
+ assert_bdrv_graph_readable();
+
QLIST_FOREACH(child, &bs->parents, next_parent) {
if (child->klass == &child_root) {
return child->opaque;
@@ -745,6 +783,7 @@ static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
*/
bool bdrv_has_blk(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
return bdrv_first_blk(bs) != NULL;
}
@@ -755,6 +794,9 @@ bool bdrv_is_root_node(BlockDriverState *bs)
{
BdrvChild *c;
+ GLOBAL_STATE_CODE();
+ assert_bdrv_graph_readable();
+
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (c->klass != &child_root) {
return false;
@@ -769,6 +811,7 @@ bool bdrv_is_root_node(BlockDriverState *bs)
*/
DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->legacy_dinfo;
}
@@ -780,6 +823,7 @@ DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
{
assert(!blk->legacy_dinfo);
+ GLOBAL_STATE_CODE();
return blk->legacy_dinfo = dinfo;
}
@@ -790,6 +834,7 @@ DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
while ((blk = blk_next(blk)) != NULL) {
if (blk->legacy_dinfo == dinfo) {
@@ -804,6 +849,7 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
*/
BlockBackendPublic *blk_get_public(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return &blk->public;
}
@@ -812,6 +858,7 @@ BlockBackendPublic *blk_get_public(BlockBackend *blk)
*/
BlockBackend *blk_by_public(BlockBackendPublic *public)
{
+ GLOBAL_STATE_CODE();
return container_of(public, BlockBackend, public);
}
@@ -821,16 +868,24 @@ BlockBackend *blk_by_public(BlockBackendPublic *public)
void blk_remove_bs(BlockBackend *blk)
{
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
- BlockDriverState *bs;
BdrvChild *root;
+ GLOBAL_STATE_CODE();
+
notifier_list_notify(&blk->remove_bs_notifiers, blk);
if (tgm->throttle_state) {
- bs = blk_bs(blk);
+ BlockDriverState *bs = blk_bs(blk);
+
+ /*
+ * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for
+ * example, if a temporary filter node is removed by a blockjob.
+ */
+ bdrv_ref(bs);
bdrv_drained_begin(bs);
throttle_group_detach_aio_context(tgm);
throttle_group_attach_aio_context(tgm, qemu_get_aio_context());
bdrv_drained_end(bs);
+ bdrv_unref(bs);
}
blk_update_root_state(blk);
@@ -842,7 +897,10 @@ void blk_remove_bs(BlockBackend *blk)
blk_drain(blk);
root = blk->root;
blk->root = NULL;
+
+ bdrv_graph_wrlock();
bdrv_root_unref_child(root);
+ bdrv_graph_wrunlock();
}
/*
@@ -851,11 +909,15 @@ void blk_remove_bs(BlockBackend *blk)
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
{
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
+
+ GLOBAL_STATE_CODE();
bdrv_ref(bs);
+ bdrv_graph_wrlock();
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
blk->perm, blk->shared_perm,
blk, errp);
+ bdrv_graph_wrunlock();
if (blk->root == NULL) {
return -EPERM;
}
@@ -874,16 +936,19 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
*/
int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp)
{
+ GLOBAL_STATE_CODE();
return bdrv_replace_child_bs(blk->root, new_bs, errp);
}
/*
* Sets the permission bitmasks that the user of the BlockBackend needs.
*/
-int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
- Error **errp)
+static int coroutine_mixed_fn GRAPH_RDLOCK
+blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+ Error **errp)
{
int ret;
+ GLOBAL_STATE_CODE();
if (blk->root && !blk->disable_perm) {
ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
@@ -898,8 +963,18 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
return 0;
}
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+ Error **errp)
+{
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+ return blk_set_perm_locked(blk, perm, shared_perm, errp);
+}
+
void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
{
+ GLOBAL_STATE_CODE();
*perm = blk->perm;
*shared_perm = blk->shared_perm;
}
@@ -910,6 +985,7 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
*/
int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
{
+ GLOBAL_STATE_CODE();
if (blk->dev) {
return -EBUSY;
}
@@ -935,10 +1011,10 @@ int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
{
assert(blk->dev == dev);
+ GLOBAL_STATE_CODE();
blk->dev = NULL;
blk->dev_ops = NULL;
blk->dev_opaque = NULL;
- blk->guest_block_size = 512;
blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
blk_unref(blk);
}
@@ -948,6 +1024,7 @@ void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
*/
DeviceState *blk_get_attached_dev(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->dev;
}
@@ -956,6 +1033,7 @@ DeviceState *blk_get_attached_dev(BlockBackend *blk)
char *blk_get_attached_dev_id(BlockBackend *blk)
{
DeviceState *dev = blk->dev;
+ IO_CODE();
if (!dev) {
return g_strdup("");
@@ -976,6 +1054,8 @@ BlockBackend *blk_by_dev(void *dev)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
+
assert(dev != NULL);
while ((blk = blk_all_next(blk)) != NULL) {
if (blk->dev == dev) {
@@ -993,11 +1073,12 @@ BlockBackend *blk_by_dev(void *dev)
void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
void *opaque)
{
+ GLOBAL_STATE_CODE();
blk->dev_ops = ops;
blk->dev_opaque = opaque;
/* Are we currently quiesced? Should we enforce this right now? */
- if (blk->quiesce_counter && ops->drained_begin) {
+ if (qatomic_read(&blk->quiesce_counter) && ops && ops->drained_begin) {
ops->drained_begin(opaque);
}
}
@@ -1014,6 +1095,7 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
*/
void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
{
+ GLOBAL_STATE_CODE();
if (blk->dev_ops && blk->dev_ops->change_media_cb) {
bool tray_was_open, tray_is_open;
Error *local_err = NULL;
@@ -1046,6 +1128,7 @@ static void blk_root_change_media(BdrvChild *child, bool load)
*/
bool blk_dev_has_removable_media(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
}
@@ -1054,6 +1137,7 @@ bool blk_dev_has_removable_media(BlockBackend *blk)
*/
bool blk_dev_has_tray(BlockBackend *blk)
{
+ IO_CODE();
return blk->dev_ops && blk->dev_ops->is_tray_open;
}
@@ -1063,6 +1147,7 @@ bool blk_dev_has_tray(BlockBackend *blk)
*/
void blk_dev_eject_request(BlockBackend *blk, bool force)
{
+ GLOBAL_STATE_CODE();
if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
}
@@ -1073,6 +1158,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force)
*/
bool blk_dev_is_tray_open(BlockBackend *blk)
{
+ IO_CODE();
if (blk_dev_has_tray(blk)) {
return blk->dev_ops->is_tray_open(blk->dev_opaque);
}
@@ -1085,6 +1171,7 @@ bool blk_dev_is_tray_open(BlockBackend *blk)
*/
bool blk_dev_is_medium_locked(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
return blk->dev_ops->is_medium_locked(blk->dev_opaque);
}
@@ -1105,6 +1192,7 @@ static void blk_root_resize(BdrvChild *child)
void blk_iostatus_enable(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
blk->iostatus_enabled = true;
blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
}
@@ -1113,6 +1201,7 @@ void blk_iostatus_enable(BlockBackend *blk)
* enables it _and_ the VM is configured to stop on errors */
bool blk_iostatus_is_enabled(const BlockBackend *blk)
{
+ IO_CODE();
return (blk->iostatus_enabled &&
(blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
blk->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
@@ -1121,16 +1210,19 @@ bool blk_iostatus_is_enabled(const BlockBackend *blk)
BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->iostatus;
}
void blk_iostatus_disable(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
blk->iostatus_enabled = false;
}
void blk_iostatus_reset(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
if (blk_iostatus_is_enabled(blk)) {
blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
}
@@ -1138,6 +1230,7 @@ void blk_iostatus_reset(BlockBackend *blk)
void blk_iostatus_set_err(BlockBackend *blk, int error)
{
+ IO_CODE();
assert(blk_iostatus_is_enabled(blk));
if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
@@ -1147,29 +1240,32 @@ void blk_iostatus_set_err(BlockBackend *blk, int error)
void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
{
+ IO_CODE();
blk->allow_write_beyond_eof = allow;
}
void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
{
+ IO_CODE();
blk->allow_aio_context_change = allow;
}
void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
{
- blk->disable_request_queuing = disable;
+ IO_CODE();
+ qatomic_set(&blk->disable_request_queuing, disable);
}
-static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
- size_t size)
+static int coroutine_fn GRAPH_RDLOCK
+blk_check_byte_request(BlockBackend *blk, int64_t offset, int64_t bytes)
{
int64_t len;
- if (size > INT_MAX) {
+ if (bytes < 0) {
return -EIO;
}
- if (!blk_is_available(blk)) {
+ if (!blk_co_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -1178,12 +1274,12 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
}
if (!blk->allow_write_beyond_eof) {
- len = blk_getlength(blk);
+ len = bdrv_co_getlength(blk_bs(blk));
if (len < 0) {
return len;
}
- if (offset > len || len - offset < size) {
+ if (offset > len || len - offset < bytes) {
return -EIO;
}
}
@@ -1191,27 +1287,45 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
return 0;
}
+/* Are we currently in a drained section? */
+bool blk_in_drain(BlockBackend *blk)
+{
+ GLOBAL_STATE_CODE(); /* change to IO_OR_GS_CODE(), if necessary */
+ return qatomic_read(&blk->quiesce_counter);
+}
+
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
{
assert(blk->in_flight > 0);
- if (blk->quiesce_counter && !blk->disable_request_queuing) {
+ if (qatomic_read(&blk->quiesce_counter) &&
+ !qatomic_read(&blk->disable_request_queuing)) {
+ /*
+ * Take lock before decrementing in flight counter so main loop thread
+ * waits for us to enqueue ourselves before it can leave the drained
+ * section.
+ */
+ qemu_mutex_lock(&blk->queued_requests_lock);
blk_dec_in_flight(blk);
- qemu_co_queue_wait(&blk->queued_requests, NULL);
+ qemu_co_queue_wait(&blk->queued_requests, &blk->queued_requests_lock);
blk_inc_in_flight(blk);
+ qemu_mutex_unlock(&blk->queued_requests_lock);
}
}
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
static int coroutine_fn
-blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes,
- QEMUIOVector *qiov, BdrvRequestFlags flags)
+blk_co_do_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags)
{
int ret;
BlockDriverState *bs;
+ IO_CODE();
blk_wait_while_drained(blk);
+ GRAPH_RDLOCK_GUARD();
/* Call blk_bs() only after waiting, the graph may have changed */
bs = blk_bs(blk);
@@ -1227,22 +1341,49 @@ blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes,
/* throttling disk I/O */
if (blk->public.throttle_group_member.throttle_state) {
throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
- bytes, false);
+ bytes, THROTTLE_READ);
}
- ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
+ ret = bdrv_co_preadv_part(blk->root, offset, bytes, qiov, qiov_offset,
+ flags);
bdrv_dec_in_flight(bs);
return ret;
}
+int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, int64_t bytes,
+ void *buf, BdrvRequestFlags flags)
+{
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_OR_GS_CODE();
+
+ assert(bytes <= SIZE_MAX);
+
+ return blk_co_preadv(blk, offset, bytes, &qiov, flags);
+}
+
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
- unsigned int bytes, QEMUIOVector *qiov,
+ int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
int ret;
+ IO_OR_GS_CODE();
+
+ blk_inc_in_flight(blk);
+ ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, 0, flags);
+ blk_dec_in_flight(blk);
+
+ return ret;
+}
+
+int coroutine_fn blk_co_preadv_part(BlockBackend *blk, int64_t offset,
+ int64_t bytes, QEMUIOVector *qiov,
+ size_t qiov_offset, BdrvRequestFlags flags)
+{
+ int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
- ret = blk_do_preadv(blk, offset, bytes, qiov, flags);
+ ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, qiov_offset, flags);
blk_dec_in_flight(blk);
return ret;
@@ -1250,14 +1391,16 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
static int coroutine_fn
-blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes,
- QEMUIOVector *qiov, size_t qiov_offset,
- BdrvRequestFlags flags)
+blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags)
{
int ret;
BlockDriverState *bs;
+ IO_CODE();
blk_wait_while_drained(blk);
+ GRAPH_RDLOCK_GUARD();
/* Call blk_bs() only after waiting, the graph may have changed */
bs = blk_bs(blk);
@@ -1272,7 +1415,7 @@ blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes,
/* throttling disk I/O */
if (blk->public.throttle_group_member.throttle_state) {
throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
- bytes, true);
+ bytes, THROTTLE_WRITE);
}
if (!blk->enable_write_cache) {
@@ -1286,100 +1429,85 @@ blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes,
}
int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
- unsigned int bytes,
+ int64_t bytes,
QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags)
{
int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
- ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
+ ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
blk_dec_in_flight(blk);
return ret;
}
-int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
- unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
+int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, int64_t bytes,
+ const void *buf, BdrvRequestFlags flags)
{
- return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
-}
-
-typedef struct BlkRwCo {
- BlockBackend *blk;
- int64_t offset;
- void *iobuf;
- int ret;
- BdrvRequestFlags flags;
-} BlkRwCo;
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_OR_GS_CODE();
-static void blk_read_entry(void *opaque)
-{
- BlkRwCo *rwco = opaque;
- QEMUIOVector *qiov = rwco->iobuf;
+ assert(bytes <= SIZE_MAX);
- rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size,
- qiov, rwco->flags);
- aio_wait_kick();
+ return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
}
-static void blk_write_entry(void *opaque)
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+ int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
{
- BlkRwCo *rwco = opaque;
- QEMUIOVector *qiov = rwco->iobuf;
-
- rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size,
- qiov, 0, rwco->flags);
- aio_wait_kick();
+ IO_OR_GS_CODE();
+ return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
}
-static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
- int64_t bytes, CoroutineEntry co_entry,
- BdrvRequestFlags flags)
+int coroutine_fn blk_co_block_status_above(BlockBackend *blk,
+ BlockDriverState *base,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map,
+ BlockDriverState **file)
{
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
- BlkRwCo rwco = {
- .blk = blk,
- .offset = offset,
- .iobuf = &qiov,
- .flags = flags,
- .ret = NOT_DONE,
- };
-
- blk_inc_in_flight(blk);
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- co_entry(&rwco);
- } else {
- Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
- bdrv_coroutine_enter(blk_bs(blk), co);
- BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
- }
- blk_dec_in_flight(blk);
-
- return rwco.ret;
+ IO_CODE();
+ GRAPH_RDLOCK_GUARD();
+ return bdrv_co_block_status_above(blk_bs(blk), base, offset, bytes, pnum,
+ map, file);
}
-int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
- int bytes, BdrvRequestFlags flags)
+int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk,
+ BlockDriverState *base,
+ bool include_base, int64_t offset,
+ int64_t bytes, int64_t *pnum)
{
- return blk_prw(blk, offset, NULL, bytes, blk_write_entry,
- flags | BDRV_REQ_ZERO_WRITE);
+ IO_CODE();
+ GRAPH_RDLOCK_GUARD();
+ return bdrv_co_is_allocated_above(blk_bs(blk), base, include_base, offset,
+ bytes, pnum);
}
+typedef struct BlkRwCo {
+ BlockBackend *blk;
+ int64_t offset;
+ void *iobuf;
+ int ret;
+ BdrvRequestFlags flags;
+} BlkRwCo;
+
int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
{
+ GLOBAL_STATE_CODE();
return bdrv_make_zero(blk->root, flags);
}
void blk_inc_in_flight(BlockBackend *blk)
{
+ IO_CODE();
qatomic_inc(&blk->in_flight);
}
void blk_dec_in_flight(BlockBackend *blk)
{
+ IO_CODE();
qatomic_dec(&blk->in_flight);
aio_wait_kick();
}
@@ -1398,13 +1526,14 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
void *opaque, int ret)
{
struct BlockBackendAIOCB *acb;
+ IO_CODE();
blk_inc_in_flight(blk);
acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
acb->blk = blk;
acb->ret = ret;
- replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+ replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
error_callback_bh, acb);
return &acb->common;
}
@@ -1412,20 +1541,12 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
typedef struct BlkAioEmAIOCB {
BlockAIOCB common;
BlkRwCo rwco;
- int bytes;
+ int64_t bytes;
bool has_returned;
} BlkAioEmAIOCB;
-static AioContext *blk_aio_em_aiocb_get_aio_context(BlockAIOCB *acb_)
-{
- BlkAioEmAIOCB *acb = container_of(acb_, BlkAioEmAIOCB, common);
-
- return blk_get_aio_context(acb->rwco.blk);
-}
-
static const AIOCBInfo blk_aio_em_aiocb_info = {
.aiocb_size = sizeof(BlkAioEmAIOCB),
- .get_aio_context = blk_aio_em_aiocb_get_aio_context,
};
static void blk_aio_complete(BlkAioEmAIOCB *acb)
@@ -1444,7 +1565,8 @@ static void blk_aio_complete_bh(void *opaque)
blk_aio_complete(acb);
}
-static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
+static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset,
+ int64_t bytes,
void *iobuf, CoroutineEntry co_entry,
BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
@@ -1465,100 +1587,117 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
acb->has_returned = false;
co = qemu_coroutine_create(co_entry, acb);
- bdrv_coroutine_enter(blk_bs(blk), co);
+ aio_co_enter(qemu_get_current_aio_context(), co);
acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
- replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+ replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
blk_aio_complete_bh, acb);
}
return &acb->common;
}
-static void blk_aio_read_entry(void *opaque)
+static void coroutine_fn blk_aio_read_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
QEMUIOVector *qiov = rwco->iobuf;
assert(qiov->size == acb->bytes);
- rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes,
- qiov, rwco->flags);
+ rwco->ret = blk_co_do_preadv_part(rwco->blk, rwco->offset, acb->bytes, qiov,
+ 0, rwco->flags);
blk_aio_complete(acb);
}
-static void blk_aio_write_entry(void *opaque)
+static void coroutine_fn blk_aio_write_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
QEMUIOVector *qiov = rwco->iobuf;
assert(!qiov || qiov->size == acb->bytes);
- rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
- qiov, 0, rwco->flags);
+ rwco->ret = blk_co_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
+ qiov, 0, rwco->flags);
blk_aio_complete(acb);
}
BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
- int count, BdrvRequestFlags flags,
+ int64_t bytes, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
- return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
+ IO_CODE();
+ return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_write_entry,
flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
}
-int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
+int64_t coroutine_fn blk_co_getlength(BlockBackend *blk)
{
- int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
- if (ret < 0) {
- return ret;
- }
- return count;
-}
+ IO_CODE();
+ GRAPH_RDLOCK_GUARD();
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
- BdrvRequestFlags flags)
-{
- int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
- flags);
- if (ret < 0) {
- return ret;
+ if (!blk_co_is_available(blk)) {
+ return -ENOMEDIUM;
}
- return count;
+
+ return bdrv_co_getlength(blk_bs(blk));
}
-int64_t blk_getlength(BlockBackend *blk)
+int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk)
{
- if (!blk_is_available(blk)) {
+ BlockDriverState *bs = blk_bs(blk);
+
+ IO_CODE();
+ GRAPH_RDLOCK_GUARD();
+
+ if (!bs) {
return -ENOMEDIUM;
+ } else {
+ return bdrv_co_nb_sectors(bs);
}
-
- return bdrv_getlength(blk_bs(blk));
}
-void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
+/*
+ * This wrapper is written by hand because this function is in the hot I/O path,
+ * via blk_get_geometry.
+ */
+int64_t coroutine_mixed_fn blk_nb_sectors(BlockBackend *blk)
{
- if (!blk_bs(blk)) {
- *nb_sectors_ptr = 0;
+ BlockDriverState *bs = blk_bs(blk);
+
+ IO_CODE();
+
+ if (!bs) {
+ return -ENOMEDIUM;
} else {
- bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
+ return bdrv_nb_sectors(bs);
}
}
-int64_t blk_nb_sectors(BlockBackend *blk)
+/* return 0 as number of sectors if no device present or error */
+void coroutine_fn blk_co_get_geometry(BlockBackend *blk,
+ uint64_t *nb_sectors_ptr)
{
- if (!blk_is_available(blk)) {
- return -ENOMEDIUM;
- }
+ int64_t ret = blk_co_nb_sectors(blk);
+ *nb_sectors_ptr = ret < 0 ? 0 : ret;
+}
- return bdrv_nb_sectors(blk_bs(blk));
+/*
+ * This wrapper is written by hand because this function is in the hot I/O path.
+ */
+void coroutine_mixed_fn blk_get_geometry(BlockBackend *blk,
+ uint64_t *nb_sectors_ptr)
+{
+ int64_t ret = blk_nb_sectors(blk);
+ *nb_sectors_ptr = ret < 0 ? 0 : ret;
}
BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
+ assert((uint64_t)qiov->size <= INT64_MAX);
return blk_aio_prwv(blk, offset, qiov->size, qiov,
blk_aio_read_entry, flags, cb, opaque);
}
@@ -1567,53 +1706,59 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
+ assert((uint64_t)qiov->size <= INT64_MAX);
return blk_aio_prwv(blk, offset, qiov->size, qiov,
blk_aio_write_entry, flags, cb, opaque);
}
void blk_aio_cancel(BlockAIOCB *acb)
{
+ GLOBAL_STATE_CODE();
bdrv_aio_cancel(acb);
}
void blk_aio_cancel_async(BlockAIOCB *acb)
{
+ IO_CODE();
bdrv_aio_cancel_async(acb);
}
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
static int coroutine_fn
-blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
+blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
+ IO_CODE();
+
blk_wait_while_drained(blk);
+ GRAPH_RDLOCK_GUARD();
- if (!blk_is_available(blk)) {
+ if (!blk_co_is_available(blk)) {
return -ENOMEDIUM;
}
return bdrv_co_ioctl(blk_bs(blk), req, buf);
}
-static void blk_ioctl_entry(void *opaque)
+int coroutine_fn blk_co_ioctl(BlockBackend *blk, unsigned long int req,
+ void *buf)
{
- BlkRwCo *rwco = opaque;
- QEMUIOVector *qiov = rwco->iobuf;
+ int ret;
+ IO_OR_GS_CODE();
- rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base);
- aio_wait_kick();
-}
+ blk_inc_in_flight(blk);
+ ret = blk_co_do_ioctl(blk, req, buf);
+ blk_dec_in_flight(blk);
-int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
-{
- return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
+ return ret;
}
-static void blk_aio_ioctl_entry(void *opaque)
+static void coroutine_fn blk_aio_ioctl_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
- rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
+ rwco->ret = blk_co_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
blk_aio_complete(acb);
}
@@ -1621,16 +1766,19 @@ static void blk_aio_ioctl_entry(void *opaque)
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
}
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
static int coroutine_fn
-blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
{
int ret;
+ IO_CODE();
blk_wait_while_drained(blk);
+ GRAPH_RDLOCK_GUARD();
ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
@@ -1640,112 +1788,294 @@ blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
return bdrv_co_pdiscard(blk->root, offset, bytes);
}
-static void blk_aio_pdiscard_entry(void *opaque)
+static void coroutine_fn blk_aio_pdiscard_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
- rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
+ rwco->ret = blk_co_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
blk_aio_complete(acb);
}
BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
- int64_t offset, int bytes,
+ int64_t offset, int64_t bytes,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
cb, opaque);
}
-int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
+ int64_t bytes)
{
int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
- ret = blk_do_pdiscard(blk, offset, bytes);
+ ret = blk_co_do_pdiscard(blk, offset, bytes);
blk_dec_in_flight(blk);
return ret;
}
-static void blk_pdiscard_entry(void *opaque)
-{
- BlkRwCo *rwco = opaque;
- QEMUIOVector *qiov = rwco->iobuf;
-
- rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size);
- aio_wait_kick();
-}
-
-int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
-{
- return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
-}
-
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
-static int coroutine_fn blk_do_flush(BlockBackend *blk)
+static int coroutine_fn blk_co_do_flush(BlockBackend *blk)
{
+ IO_CODE();
blk_wait_while_drained(blk);
+ GRAPH_RDLOCK_GUARD();
- if (!blk_is_available(blk)) {
+ if (!blk_co_is_available(blk)) {
return -ENOMEDIUM;
}
return bdrv_co_flush(blk_bs(blk));
}
-static void blk_aio_flush_entry(void *opaque)
+static void coroutine_fn blk_aio_flush_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
- rwco->ret = blk_do_flush(rwco->blk);
+ rwco->ret = blk_co_do_flush(rwco->blk);
blk_aio_complete(acb);
}
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
}
int coroutine_fn blk_co_flush(BlockBackend *blk)
{
int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
- ret = blk_do_flush(blk);
+ ret = blk_co_do_flush(blk);
blk_dec_in_flight(blk);
return ret;
}
-static void blk_flush_entry(void *opaque)
+static void coroutine_fn blk_aio_zone_report_entry(void *opaque)
{
- BlkRwCo *rwco = opaque;
- rwco->ret = blk_do_flush(rwco->blk);
- aio_wait_kick();
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
+ (unsigned int*)(uintptr_t)acb->bytes,
+ rwco->iobuf);
+ blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
+ unsigned int *nr_zones,
+ BlockZoneDescriptor *zones,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ BlkAioEmAIOCB *acb;
+ Coroutine *co;
+ IO_CODE();
+
+ blk_inc_in_flight(blk);
+ acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+ acb->rwco = (BlkRwCo) {
+ .blk = blk,
+ .offset = offset,
+ .iobuf = zones,
+ .ret = NOT_DONE,
+ };
+ acb->bytes = (int64_t)(uintptr_t)nr_zones,
+ acb->has_returned = false;
+
+ co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
+ aio_co_enter(qemu_get_current_aio_context(), co);
+
+ acb->has_returned = true;
+ if (acb->rwco.ret != NOT_DONE) {
+ replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
+ blk_aio_complete_bh, acb);
+ }
+
+ return &acb->common;
}
-int blk_flush(BlockBackend *blk)
+static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque)
{
- return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0);
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ rwco->ret = blk_co_zone_mgmt(rwco->blk,
+ (BlockZoneOp)(uintptr_t)rwco->iobuf,
+ rwco->offset, acb->bytes);
+ blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
+ int64_t offset, int64_t len,
+ BlockCompletionFunc *cb, void *opaque) {
+ BlkAioEmAIOCB *acb;
+ Coroutine *co;
+ IO_CODE();
+
+ blk_inc_in_flight(blk);
+ acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+ acb->rwco = (BlkRwCo) {
+ .blk = blk,
+ .offset = offset,
+ .iobuf = (void *)(uintptr_t)op,
+ .ret = NOT_DONE,
+ };
+ acb->bytes = len;
+ acb->has_returned = false;
+
+ co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
+ aio_co_enter(qemu_get_current_aio_context(), co);
+
+ acb->has_returned = true;
+ if (acb->rwco.ret != NOT_DONE) {
+ replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
+ blk_aio_complete_bh, acb);
+ }
+
+ return &acb->common;
+}
+
+static void coroutine_fn blk_aio_zone_append_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ rwco->ret = blk_co_zone_append(rwco->blk, (int64_t *)(uintptr_t)acb->bytes,
+ rwco->iobuf, rwco->flags);
+ blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque) {
+ BlkAioEmAIOCB *acb;
+ Coroutine *co;
+ IO_CODE();
+
+ blk_inc_in_flight(blk);
+ acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+ acb->rwco = (BlkRwCo) {
+ .blk = blk,
+ .ret = NOT_DONE,
+ .flags = flags,
+ .iobuf = qiov,
+ };
+ acb->bytes = (int64_t)(uintptr_t)offset;
+ acb->has_returned = false;
+
+ co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
+ aio_co_enter(qemu_get_current_aio_context(), co);
+ acb->has_returned = true;
+ if (acb->rwco.ret != NOT_DONE) {
+ replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
+ blk_aio_complete_bh, acb);
+ }
+
+ return &acb->common;
+}
+
+/*
+ * Send a zone_report command.
+ * offset is a byte offset from the start of the device. No alignment
+ * required for offset.
+ * nr_zones represents IN maximum and OUT actual.
+ */
+int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
+ unsigned int *nr_zones,
+ BlockZoneDescriptor *zones)
+{
+ int ret;
+ IO_CODE();
+
+ blk_inc_in_flight(blk); /* increase before waiting */
+ blk_wait_while_drained(blk);
+ GRAPH_RDLOCK_GUARD();
+ if (!blk_is_available(blk)) {
+ blk_dec_in_flight(blk);
+ return -ENOMEDIUM;
+ }
+ ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
+ blk_dec_in_flight(blk);
+ return ret;
+}
+
+/*
+ * Send a zone_management command.
+ * op is the zone operation;
+ * offset is the byte offset from the start of the zoned device;
+ * len is the maximum number of bytes the command should operate on. It
+ * should be aligned with the device zone size.
+ */
+int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
+ int64_t offset, int64_t len)
+{
+ int ret;
+ IO_CODE();
+
+ blk_inc_in_flight(blk);
+ blk_wait_while_drained(blk);
+ GRAPH_RDLOCK_GUARD();
+
+ ret = blk_check_byte_request(blk, offset, len);
+ if (ret < 0) {
+ blk_dec_in_flight(blk);
+ return ret;
+ }
+
+ ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
+ blk_dec_in_flight(blk);
+ return ret;
+}
+
+/*
+ * Send a zone_append command.
+ */
+int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+ int ret;
+ IO_CODE();
+
+ blk_inc_in_flight(blk);
+ blk_wait_while_drained(blk);
+ GRAPH_RDLOCK_GUARD();
+ if (!blk_is_available(blk)) {
+ blk_dec_in_flight(blk);
+ return -ENOMEDIUM;
+ }
+
+ ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags);
+ blk_dec_in_flight(blk);
+ return ret;
}
void blk_drain(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
+ bdrv_ref(bs);
bdrv_drained_begin(bs);
}
/* We may have -ENOMEDIUM completions in flight */
AIO_WAIT_WHILE(blk_get_aio_context(blk),
- qatomic_mb_read(&blk->in_flight) > 0);
+ qatomic_read(&blk->in_flight) > 0);
if (bs) {
bdrv_drained_end(bs);
+ bdrv_unref(bs);
}
}
@@ -1753,17 +2083,13 @@ void blk_drain_all(void)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
+
bdrv_drain_all_begin();
while ((blk = blk_all_next(blk)) != NULL) {
- AioContext *ctx = blk_get_aio_context(blk);
-
- aio_context_acquire(ctx);
-
/* We may have -ENOMEDIUM completions in flight */
- AIO_WAIT_WHILE(ctx, qatomic_mb_read(&blk->in_flight) > 0);
-
- aio_context_release(ctx);
+ AIO_WAIT_WHILE_UNLOCKED(NULL, qatomic_read(&blk->in_flight) > 0);
}
bdrv_drain_all_end();
@@ -1772,12 +2098,14 @@ void blk_drain_all(void)
void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
BlockdevOnError on_write_error)
{
+ GLOBAL_STATE_CODE();
blk->on_read_error = on_read_error;
blk->on_write_error = on_write_error;
}
BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
{
+ IO_CODE();
return is_read ? blk->on_read_error : blk->on_write_error;
}
@@ -1785,6 +2113,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
int error)
{
BlockdevOnError on_err = blk_get_on_error(blk, is_read);
+ IO_CODE();
switch (on_err) {
case BLOCKDEV_ON_ERROR_ENOSPC:
@@ -1810,7 +2139,7 @@ static void send_qmp_error_event(BlockBackend *blk,
BlockDriverState *bs = blk_bs(blk);
optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
- qapi_event_send_block_io_error(blk_name(blk), !!bs,
+ qapi_event_send_block_io_error(blk_name(blk),
bs ? bdrv_get_node_name(bs) : NULL, optype,
action, blk_iostatus_is_enabled(blk),
error == ENOSPC, strerror(error));
@@ -1824,6 +2153,7 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action,
bool is_read, int error)
{
assert(error >= 0);
+ IO_CODE();
if (action == BLOCK_ERROR_ACTION_STOP) {
/* First set the iostatus, so that "info block" returns an iostatus
@@ -1855,6 +2185,7 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action,
bool blk_supports_write_perm(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
return !bdrv_is_read_only(bs);
@@ -1869,12 +2200,14 @@ bool blk_supports_write_perm(BlockBackend *blk)
*/
bool blk_is_writable(BlockBackend *blk)
{
+ IO_CODE();
return blk->perm & BLK_PERM_WRITE;
}
bool blk_is_sg(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (!bs) {
return false;
@@ -1885,54 +2218,73 @@ bool blk_is_sg(BlockBackend *blk)
bool blk_enable_write_cache(BlockBackend *blk)
{
+ IO_CODE();
return blk->enable_write_cache;
}
void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
{
+ IO_CODE();
blk->enable_write_cache = wce;
}
-void blk_invalidate_cache(BlockBackend *blk, Error **errp)
+void blk_activate(BlockBackend *blk, Error **errp)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (!bs) {
error_setg(errp, "Device '%s' has no medium", blk->name);
return;
}
- bdrv_invalidate_cache(bs, errp);
+ /*
+ * Migration code can call this function in coroutine context, so leave
+ * coroutine context if necessary.
+ */
+ if (qemu_in_coroutine()) {
+ bdrv_co_activate(bs, errp);
+ } else {
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+ bdrv_activate(bs, errp);
+ }
}
-bool blk_is_inserted(BlockBackend *blk)
+bool coroutine_fn blk_co_is_inserted(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
+ assert_bdrv_graph_readable();
- return bs && bdrv_is_inserted(bs);
+ return bs && bdrv_co_is_inserted(bs);
}
-bool blk_is_available(BlockBackend *blk)
+bool coroutine_fn blk_co_is_available(BlockBackend *blk)
{
- return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
+ IO_CODE();
+ return blk_co_is_inserted(blk) && !blk_dev_is_tray_open(blk);
}
-void blk_lock_medium(BlockBackend *blk, bool locked)
+void coroutine_fn blk_co_lock_medium(BlockBackend *blk, bool locked)
{
BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
+ GRAPH_RDLOCK_GUARD();
if (bs) {
- bdrv_lock_medium(bs, locked);
+ bdrv_co_lock_medium(bs, locked);
}
}
-void blk_eject(BlockBackend *blk, bool eject_flag)
+void coroutine_fn blk_co_eject(BlockBackend *blk, bool eject_flag)
{
BlockDriverState *bs = blk_bs(blk);
char *id;
+ IO_CODE();
+ GRAPH_RDLOCK_GUARD();
if (bs) {
- bdrv_eject(bs, eject_flag);
+ bdrv_co_eject(bs, eject_flag);
}
/* Whether or not we ejected on the backend,
@@ -1946,6 +2298,7 @@ void blk_eject(BlockBackend *blk, bool eject_flag)
int blk_get_flags(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
return bdrv_get_flags(bs);
@@ -1958,6 +2311,7 @@ int blk_get_flags(BlockBackend *blk)
uint32_t blk_get_request_alignment(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
}
@@ -1966,6 +2320,7 @@ uint64_t blk_get_max_hw_transfer(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
uint64_t max = INT_MAX;
+ IO_CODE();
if (bs) {
max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer);
@@ -1979,6 +2334,7 @@ uint32_t blk_get_max_transfer(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
uint32_t max = INT_MAX;
+ IO_CODE();
if (bs) {
max = MIN_NON_ZERO(max, bs->bl.max_transfer);
@@ -1986,29 +2342,36 @@ uint32_t blk_get_max_transfer(BlockBackend *blk)
return ROUND_DOWN(max, blk_get_request_alignment(blk));
}
-int blk_get_max_iov(BlockBackend *blk)
+int blk_get_max_hw_iov(BlockBackend *blk)
{
- return blk->root->bs->bl.max_iov;
+ IO_CODE();
+ return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov,
+ blk->root->bs->bl.max_iov);
}
-void blk_set_guest_block_size(BlockBackend *blk, int align)
+int blk_get_max_iov(BlockBackend *blk)
{
- blk->guest_block_size = align;
+ IO_CODE();
+ return blk->root->bs->bl.max_iov;
}
void *blk_try_blockalign(BlockBackend *blk, size_t size)
{
+ IO_CODE();
return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
}
void *blk_blockalign(BlockBackend *blk, size_t size)
{
+ IO_CODE();
return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
}
bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
if (!bs) {
return false;
@@ -2020,6 +2383,7 @@ bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
bdrv_op_unblock(bs, op, reason);
@@ -2029,6 +2393,7 @@ void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
void blk_op_block_all(BlockBackend *blk, Error *reason)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
bdrv_op_block_all(bs, reason);
@@ -2038,96 +2403,119 @@ void blk_op_block_all(BlockBackend *blk, Error *reason)
void blk_op_unblock_all(BlockBackend *blk, Error *reason)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
bdrv_op_unblock_all(bs, reason);
}
}
+/**
+ * Return BB's current AioContext. Note that this context may change
+ * concurrently at any time, with one exception: If the BB has a root node
+ * attached, its context will only change through bdrv_try_change_aio_context(),
+ * which creates a drained section. Therefore, incrementing such a BB's
+ * in-flight counter will prevent its context from changing.
+ */
AioContext *blk_get_aio_context(BlockBackend *blk)
{
- BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
- if (bs) {
- AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
- assert(ctx == blk->ctx);
+ if (!blk) {
+ return qemu_get_aio_context();
}
- return blk->ctx;
-}
-
-static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
-{
- BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
- return blk_get_aio_context(blk_acb->blk);
+ return qatomic_read(&blk->ctx);
}
-static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
- bool update_root_node, Error **errp)
+int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
+ Error **errp)
{
+ bool old_allow_change;
BlockDriverState *bs = blk_bs(blk);
- ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
int ret;
- if (bs) {
- if (update_root_node) {
- ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root,
- errp);
- if (ret < 0) {
- return ret;
- }
- }
- if (tgm->throttle_state) {
- bdrv_drained_begin(bs);
- throttle_group_detach_aio_context(tgm);
- throttle_group_attach_aio_context(tgm, new_context);
- bdrv_drained_end(bs);
- }
+ GLOBAL_STATE_CODE();
+
+ if (!bs) {
+ qatomic_set(&blk->ctx, new_context);
+ return 0;
}
- blk->ctx = new_context;
- return 0;
+ bdrv_ref(bs);
+
+ old_allow_change = blk->allow_aio_context_change;
+ blk->allow_aio_context_change = true;
+
+ ret = bdrv_try_change_aio_context(bs, new_context, NULL, errp);
+
+ blk->allow_aio_context_change = old_allow_change;
+
+ bdrv_unref(bs);
+ return ret;
}
-int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
- Error **errp)
+typedef struct BdrvStateBlkRootContext {
+ AioContext *new_ctx;
+ BlockBackend *blk;
+} BdrvStateBlkRootContext;
+
+static void blk_root_set_aio_ctx_commit(void *opaque)
{
- return blk_do_set_aio_context(blk, new_context, true, errp);
+ BdrvStateBlkRootContext *s = opaque;
+ BlockBackend *blk = s->blk;
+ AioContext *new_context = s->new_ctx;
+ ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
+
+ qatomic_set(&blk->ctx, new_context);
+ if (tgm->throttle_state) {
+ throttle_group_detach_aio_context(tgm);
+ throttle_group_attach_aio_context(tgm, new_context);
+ }
}
-static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
- GSList **ignore, Error **errp)
+static TransactionActionDrv set_blk_root_context = {
+ .commit = blk_root_set_aio_ctx_commit,
+ .clean = g_free,
+};
+
+static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx,
+ GHashTable *visited, Transaction *tran,
+ Error **errp)
{
BlockBackend *blk = child->opaque;
+ BdrvStateBlkRootContext *s;
- if (blk->allow_aio_context_change) {
- return true;
+ if (!blk->allow_aio_context_change) {
+ /*
+ * Manually created BlockBackends (those with a name) that are not
+ * attached to anything can change their AioContext without updating
+ * their user; return an error for others.
+ */
+ if (!blk->name || blk->dev) {
+ /* TODO Add BB name/QOM path */
+ error_setg(errp, "Cannot change iothread of active block backend");
+ return false;
+ }
}
- /* Only manually created BlockBackends that are not attached to anything
- * can change their AioContext without updating their user. */
- if (!blk->name || blk->dev) {
- /* TODO Add BB name/QOM path */
- error_setg(errp, "Cannot change iothread of active block backend");
- return false;
- }
+ s = g_new(BdrvStateBlkRootContext, 1);
+ *s = (BdrvStateBlkRootContext) {
+ .new_ctx = ctx,
+ .blk = blk,
+ };
+ tran_add(tran, &set_blk_root_context, s);
return true;
}
-static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
- GSList **ignore)
-{
- BlockBackend *blk = child->opaque;
- blk_do_set_aio_context(blk, ctx, false, &error_abort);
-}
-
void blk_add_aio_context_notifier(BlockBackend *blk,
void (*attached_aio_context)(AioContext *new_context, void *opaque),
void (*detach_aio_context)(void *opaque), void *opaque)
{
BlockBackendAioNotifier *notifier;
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
notifier = g_new(BlockBackendAioNotifier, 1);
notifier->attached_aio_context = attached_aio_context;
@@ -2150,6 +2538,8 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
BlockBackendAioNotifier *notifier;
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
+
if (bs) {
bdrv_remove_aio_context_notifier(bs, attached_aio_context,
detach_aio_context, opaque);
@@ -2170,72 +2560,65 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
{
+ GLOBAL_STATE_CODE();
notifier_list_add(&blk->remove_bs_notifiers, notify);
}
void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
{
+ GLOBAL_STATE_CODE();
notifier_list_add(&blk->insert_bs_notifiers, notify);
}
-void blk_io_plug(BlockBackend *blk)
-{
- BlockDriverState *bs = blk_bs(blk);
-
- if (bs) {
- bdrv_io_plug(bs);
- }
-}
-
-void blk_io_unplug(BlockBackend *blk)
-{
- BlockDriverState *bs = blk_bs(blk);
-
- if (bs) {
- bdrv_io_unplug(bs);
- }
-}
-
BlockAcctStats *blk_get_stats(BlockBackend *blk)
{
+ IO_CODE();
return &blk->stats;
}
void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
}
int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
- int bytes, BdrvRequestFlags flags)
+ int64_t bytes, BdrvRequestFlags flags)
{
+ IO_OR_GS_CODE();
return blk_co_pwritev(blk, offset, bytes, NULL,
flags | BDRV_REQ_ZERO_WRITE);
}
-int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
- int count)
+int coroutine_fn blk_co_pwrite_compressed(BlockBackend *blk, int64_t offset,
+ int64_t bytes, const void *buf)
{
- return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
- BDRV_REQ_WRITE_COMPRESSED);
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_OR_GS_CODE();
+ return blk_co_pwritev_part(blk, offset, bytes, &qiov, 0,
+ BDRV_REQ_WRITE_COMPRESSED);
}
-int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
- PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
+int coroutine_fn blk_co_truncate(BlockBackend *blk, int64_t offset, bool exact,
+ PreallocMode prealloc, BdrvRequestFlags flags,
+ Error **errp)
{
- if (!blk_is_available(blk)) {
+ IO_OR_GS_CODE();
+ GRAPH_RDLOCK_GUARD();
+ if (!blk_co_is_available(blk)) {
error_setg(errp, "No medium inserted");
return -ENOMEDIUM;
}
- return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp);
+ return bdrv_co_truncate(blk->root, offset, exact, prealloc, flags, errp);
}
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int64_t pos, int size)
{
int ret;
+ GLOBAL_STATE_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
@@ -2255,6 +2638,7 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
{
+ GLOBAL_STATE_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -2264,6 +2648,9 @@ int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
{
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -2273,6 +2660,7 @@ int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
{
+ GLOBAL_STATE_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -2286,6 +2674,7 @@ int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
*/
void blk_update_root_state(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
assert(blk->root);
blk->root_state.open_flags = blk->root->bs->open_flags;
@@ -2298,6 +2687,7 @@ void blk_update_root_state(BlockBackend *blk)
*/
bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->root_state.detect_zeroes;
}
@@ -2307,33 +2697,33 @@ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
*/
int blk_get_open_flags_from_root_state(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->root_state.open_flags;
}
BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return &blk->root_state;
}
int blk_commit_all(void)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
while ((blk = blk_all_next(blk)) != NULL) {
- AioContext *aio_context = blk_get_aio_context(blk);
BlockDriverState *unfiltered_bs = bdrv_skip_filters(blk_bs(blk));
- aio_context_acquire(aio_context);
if (blk_is_inserted(blk) && bdrv_cow_child(unfiltered_bs)) {
int ret;
ret = bdrv_commit(unfiltered_bs);
if (ret < 0) {
- aio_context_release(aio_context);
return ret;
}
}
- aio_context_release(aio_context);
}
return 0;
}
@@ -2342,6 +2732,7 @@ int blk_commit_all(void)
/* throttling disk I/O limits */
void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
{
+ GLOBAL_STATE_CODE();
throttle_group_config(&blk->public.throttle_group_member, cfg);
}
@@ -2350,12 +2741,15 @@ void blk_io_limits_disable(BlockBackend *blk)
BlockDriverState *bs = blk_bs(blk);
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
assert(tgm->throttle_state);
+ GLOBAL_STATE_CODE();
if (bs) {
+ bdrv_ref(bs);
bdrv_drained_begin(bs);
}
throttle_group_unregister_tgm(tgm);
if (bs) {
bdrv_drained_end(bs);
+ bdrv_unref(bs);
}
}
@@ -2363,12 +2757,14 @@ void blk_io_limits_disable(BlockBackend *blk)
void blk_io_limits_enable(BlockBackend *blk, const char *group)
{
assert(!blk->public.throttle_group_member.throttle_state);
+ GLOBAL_STATE_CODE();
throttle_group_register_tgm(&blk->public.throttle_group_member,
group, blk_get_aio_context(blk));
}
void blk_io_limits_update_group(BlockBackend *blk, const char *group)
{
+ GLOBAL_STATE_CODE();
/* this BB is not part of any group */
if (!blk->public.throttle_group_member.throttle_state) {
return;
@@ -2390,7 +2786,7 @@ static void blk_root_drained_begin(BdrvChild *child)
BlockBackend *blk = child->opaque;
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
- if (++blk->quiesce_counter == 1) {
+ if (qatomic_fetch_inc(&blk->quiesce_counter) == 0) {
if (blk->dev_ops && blk->dev_ops->drained_begin) {
blk->dev_ops->drained_begin(blk->dev_opaque);
}
@@ -2408,7 +2804,7 @@ static bool blk_root_drained_poll(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
bool busy = false;
- assert(blk->quiesce_counter);
+ assert(qatomic_read(&blk->quiesce_counter));
if (blk->dev_ops && blk->dev_ops->drained_poll) {
busy = blk->dev_ops->drained_poll(blk->dev_opaque);
@@ -2416,40 +2812,59 @@ static bool blk_root_drained_poll(BdrvChild *child)
return busy || !!blk->in_flight;
}
-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
+static void blk_root_drained_end(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
- assert(blk->quiesce_counter);
+ assert(qatomic_read(&blk->quiesce_counter));
assert(blk->public.throttle_group_member.io_limits_disabled);
qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled);
- if (--blk->quiesce_counter == 0) {
+ if (qatomic_fetch_dec(&blk->quiesce_counter) == 1) {
if (blk->dev_ops && blk->dev_ops->drained_end) {
blk->dev_ops->drained_end(blk->dev_opaque);
}
- while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
+ qemu_mutex_lock(&blk->queued_requests_lock);
+ while (qemu_co_enter_next(&blk->queued_requests,
+ &blk->queued_requests_lock)) {
/* Resume all queued requests */
}
+ qemu_mutex_unlock(&blk->queued_requests_lock);
}
}
-void blk_register_buf(BlockBackend *blk, void *host, size_t size)
+bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp)
{
- bdrv_register_buf(blk_bs(blk), host, size);
+ BlockDriverState *bs = blk_bs(blk);
+
+ GLOBAL_STATE_CODE();
+
+ if (bs) {
+ return bdrv_register_buf(bs, host, size, errp);
+ }
+ return true;
}
-void blk_unregister_buf(BlockBackend *blk, void *host)
+void blk_unregister_buf(BlockBackend *blk, void *host, size_t size)
{
- bdrv_unregister_buf(blk_bs(blk), host);
+ BlockDriverState *bs = blk_bs(blk);
+
+ GLOBAL_STATE_CODE();
+
+ if (bs) {
+ bdrv_unregister_buf(bs, host, size);
+ }
}
int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
BlockBackend *blk_out, int64_t off_out,
- int bytes, BdrvRequestFlags read_flags,
+ int64_t bytes, BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags)
{
int r;
+ IO_CODE();
+ GRAPH_RDLOCK_GUARD();
+
r = blk_check_byte_request(blk_in, off_in, bytes);
if (r) {
return r;
@@ -2458,6 +2873,7 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
if (r) {
return r;
}
+
return bdrv_co_copy_range(blk_in->root, off_in,
blk_out->root, off_out,
bytes, read_flags, write_flags);
@@ -2465,11 +2881,15 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
const BdrvChild *blk_root(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->root;
}
int blk_make_empty(BlockBackend *blk, Error **errp)
{
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
if (!blk_is_available(blk)) {
error_setg(errp, "No medium inserted");
return -ENOMEDIUM;