aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-04-07 19:12:45 +0100
committerPeter Maydell <peter.maydell@linaro.org>2020-04-07 19:12:45 +0100
commit2f37b0222cf9274d014fcb1f211b14ee626561c9 (patch)
tree1fbda138b833507eb137fe7f5971417a5d64fd9a
parent339205e7ef370663b329e34fd9e905ca00321aa4 (diff)
parent3f6de653b946fe849330208becf79d6af7e876cb (diff)
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches: - Fix crashes and hangs related to iothreads, bdrv_drain and block jobs: - Fix some AIO context locking in jobs - Fix blk->in_flight during blk_wait_while_drained() - vpc: Don't round up already aligned BAT sizes # gpg: Signature made Tue 07 Apr 2020 15:25:24 BST # gpg: using RSA key 7F09B272C88F2FD6 # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full] # Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6 * remotes/kevin/tags/for-upstream: vpc: Don't round up already aligned BAT sizes block: Fix blk->in_flight during blk_wait_while_drained() block: Increase BB.in_flight for coroutine and sync interfaces block-backend: Reorder flush/pdiscard function definitions backup: don't acquire aio_context in backup_clean replication: assert we own context before job_cancel_sync job: take each job's lock individually in job_txn_apply Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--block/backup.c4
-rw-r--r--block/block-backend.c206
-rw-r--r--block/replication.c5
-rw-r--r--block/vpc.c2
-rw-r--r--blockdev.c9
-rw-r--r--include/sysemu/block-backend.h1
-rw-r--r--job-qmp.c9
-rw-r--r--job.c50
-rw-r--r--tests/test-blockjob.c2
9 files changed, 193 insertions, 95 deletions
diff --git a/block/backup.c b/block/backup.c
index 7430ca5883..a7a7dcaf4c 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -126,11 +126,7 @@ static void backup_abort(Job *job)
static void backup_clean(Job *job)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
- AioContext *aio_context = bdrv_get_aio_context(s->backup_top);
-
- aio_context_acquire(aio_context);
bdrv_backup_top_drop(s->backup_top);
- aio_context_release(aio_context);
}
void backup_do_checkpoint(BlockJob *job, Error **errp)
diff --git a/block/block-backend.c b/block/block-backend.c
index 8b8f2a80a0..38ae413826 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1140,16 +1140,22 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
return 0;
}
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
{
+ assert(blk->in_flight > 0);
+
if (blk->quiesce_counter && !blk->disable_request_queuing) {
+ blk_dec_in_flight(blk);
qemu_co_queue_wait(&blk->queued_requests, NULL);
+ blk_inc_in_flight(blk);
}
}
-int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
- unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static int coroutine_fn
+blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
{
int ret;
BlockDriverState *bs;
@@ -1178,10 +1184,24 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
return ret;
}
-int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
- unsigned int bytes,
- QEMUIOVector *qiov, size_t qiov_offset,
- BdrvRequestFlags flags)
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ int ret;
+
+ blk_inc_in_flight(blk);
+ ret = blk_do_preadv(blk, offset, bytes, qiov, flags);
+ blk_dec_in_flight(blk);
+
+ return ret;
+}
+
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static int coroutine_fn
+blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags)
{
int ret;
BlockDriverState *bs;
@@ -1214,6 +1234,20 @@ int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
return ret;
}
+int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
+ unsigned int bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags)
+{
+ int ret;
+
+ blk_inc_in_flight(blk);
+ ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
+ blk_dec_in_flight(blk);
+
+ return ret;
+}
+
int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
@@ -1234,7 +1268,7 @@ static void blk_read_entry(void *opaque)
BlkRwCo *rwco = opaque;
QEMUIOVector *qiov = rwco->iobuf;
- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
+ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size,
qiov, rwco->flags);
aio_wait_kick();
}
@@ -1244,8 +1278,8 @@ static void blk_write_entry(void *opaque)
BlkRwCo *rwco = opaque;
QEMUIOVector *qiov = rwco->iobuf;
- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
- qiov, rwco->flags);
+ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size,
+ qiov, 0, rwco->flags);
aio_wait_kick();
}
@@ -1262,6 +1296,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
.ret = NOT_DONE,
};
+ blk_inc_in_flight(blk);
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
co_entry(&rwco);
@@ -1270,6 +1305,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
bdrv_coroutine_enter(blk_bs(blk), co);
BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
}
+ blk_dec_in_flight(blk);
return rwco.ret;
}
@@ -1387,14 +1423,8 @@ static void blk_aio_read_entry(void *opaque)
BlkRwCo *rwco = &acb->rwco;
QEMUIOVector *qiov = rwco->iobuf;
- if (rwco->blk->quiesce_counter) {
- blk_dec_in_flight(rwco->blk);
- blk_wait_while_drained(rwco->blk);
- blk_inc_in_flight(rwco->blk);
- }
-
assert(qiov->size == acb->bytes);
- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
+ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes,
qiov, rwco->flags);
blk_aio_complete(acb);
}
@@ -1405,15 +1435,9 @@ static void blk_aio_write_entry(void *opaque)
BlkRwCo *rwco = &acb->rwco;
QEMUIOVector *qiov = rwco->iobuf;
- if (rwco->blk->quiesce_counter) {
- blk_dec_in_flight(rwco->blk);
- blk_wait_while_drained(rwco->blk);
- blk_inc_in_flight(rwco->blk);
- }
-
assert(!qiov || qiov->size == acb->bytes);
- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
- qiov, rwco->flags);
+ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
+ qiov, 0, rwco->flags);
blk_aio_complete(acb);
}
@@ -1488,38 +1512,6 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
blk_aio_write_entry, flags, cb, opaque);
}
-static void blk_aio_flush_entry(void *opaque)
-{
- BlkAioEmAIOCB *acb = opaque;
- BlkRwCo *rwco = &acb->rwco;
-
- rwco->ret = blk_co_flush(rwco->blk);
- blk_aio_complete(acb);
-}
-
-BlockAIOCB *blk_aio_flush(BlockBackend *blk,
- BlockCompletionFunc *cb, void *opaque)
-{
- return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
-}
-
-static void blk_aio_pdiscard_entry(void *opaque)
-{
- BlkAioEmAIOCB *acb = opaque;
- BlkRwCo *rwco = &acb->rwco;
-
- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes);
- blk_aio_complete(acb);
-}
-
-BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
- int64_t offset, int bytes,
- BlockCompletionFunc *cb, void *opaque)
-{
- return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
- cb, opaque);
-}
-
void blk_aio_cancel(BlockAIOCB *acb)
{
bdrv_aio_cancel(acb);
@@ -1530,7 +1522,9 @@ void blk_aio_cancel_async(BlockAIOCB *acb)
bdrv_aio_cancel_async(acb);
}
-int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static int coroutine_fn
+blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
blk_wait_while_drained(blk);
@@ -1546,8 +1540,7 @@ static void blk_ioctl_entry(void *opaque)
BlkRwCo *rwco = opaque;
QEMUIOVector *qiov = rwco->iobuf;
- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
- qiov->iov[0].iov_base);
+ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base);
aio_wait_kick();
}
@@ -1561,7 +1554,7 @@ static void blk_aio_ioctl_entry(void *opaque)
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
+ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
blk_aio_complete(acb);
}
@@ -1572,7 +1565,9 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
}
-int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static int coroutine_fn
+blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
{
int ret;
@@ -1586,7 +1581,50 @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
return bdrv_co_pdiscard(blk->root, offset, bytes);
}
-int blk_co_flush(BlockBackend *blk)
+static void blk_aio_pdiscard_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
+ blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
+ int64_t offset, int bytes,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
+ cb, opaque);
+}
+
+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+{
+ int ret;
+
+ blk_inc_in_flight(blk);
+ ret = blk_do_pdiscard(blk, offset, bytes);
+ blk_dec_in_flight(blk);
+
+ return ret;
+}
+
+static void blk_pdiscard_entry(void *opaque)
+{
+ BlkRwCo *rwco = opaque;
+ QEMUIOVector *qiov = rwco->iobuf;
+
+ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size);
+ aio_wait_kick();
+}
+
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+{
+ return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
+}
+
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static int coroutine_fn blk_do_flush(BlockBackend *blk)
{
blk_wait_while_drained(blk);
@@ -1597,10 +1635,36 @@ int blk_co_flush(BlockBackend *blk)
return bdrv_co_flush(blk_bs(blk));
}
+static void blk_aio_flush_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ rwco->ret = blk_do_flush(rwco->blk);
+ blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_flush(BlockBackend *blk,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
+}
+
+int coroutine_fn blk_co_flush(BlockBackend *blk)
+{
+ int ret;
+
+ blk_inc_in_flight(blk);
+ ret = blk_do_flush(blk);
+ blk_dec_in_flight(blk);
+
+ return ret;
+}
+
static void blk_flush_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
- rwco->ret = blk_co_flush(rwco->blk);
+ rwco->ret = blk_do_flush(rwco->blk);
aio_wait_kick();
}
@@ -2083,20 +2147,6 @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
return bdrv_truncate(blk->root, offset, exact, prealloc, errp);
}
-static void blk_pdiscard_entry(void *opaque)
-{
- BlkRwCo *rwco = opaque;
- QEMUIOVector *qiov = rwco->iobuf;
-
- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
- aio_wait_kick();
-}
-
-int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
-{
- return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
-}
-
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int64_t pos, int size)
{
diff --git a/block/replication.c b/block/replication.c
index 413d95407d..da013c2041 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -144,12 +144,15 @@ fail:
static void replication_close(BlockDriverState *bs)
{
BDRVReplicationState *s = bs->opaque;
+ Job *commit_job;
if (s->stage == BLOCK_REPLICATION_RUNNING) {
replication_stop(s->rs, false, NULL);
}
if (s->stage == BLOCK_REPLICATION_FAILOVER) {
- job_cancel_sync(&s->commit_job->job);
+ commit_job = &s->commit_job->job;
+ assert(commit_job->aio_context == qemu_get_current_aio_context());
+ job_cancel_sync(commit_job);
}
if (s->mode == REPLICATION_MODE_SECONDARY) {
diff --git a/block/vpc.c b/block/vpc.c
index 6df75e22dc..d8141b52da 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -835,7 +835,7 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
/* Write the footer (twice: at the beginning and at the end) */
block_size = 0x200000;
- num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
+ num_bat_entries = DIV_ROUND_UP(total_sectors, block_size / 512);
ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
if (ret < 0) {
diff --git a/blockdev.c b/blockdev.c
index fa8630cb41..5faddaa705 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3612,7 +3612,16 @@ void qmp_block_job_finalize(const char *id, Error **errp)
}
trace_qmp_block_job_finalize(job);
+ job_ref(&job->job);
job_finalize(&job->job, errp);
+
+ /*
+ * Job's context might have changed via job_finalize (and job_txn_apply
+ * automatically acquires the new one), so make sure we release the correct
+ * one.
+ */
+ aio_context = blk_get_aio_context(job->blk);
+ job_unref(&job->job);
aio_context_release(aio_context);
}
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index b198deca0b..9bbdbd63d7 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -171,7 +171,6 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int bytes,
BlockCompletionFunc *cb, void *opaque);
void blk_aio_cancel(BlockAIOCB *acb);
void blk_aio_cancel_async(BlockAIOCB *acb);
-int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque);
diff --git a/job-qmp.c b/job-qmp.c
index fecc939ebd..f9a58832e1 100644
--- a/job-qmp.c
+++ b/job-qmp.c
@@ -114,7 +114,16 @@ void qmp_job_finalize(const char *id, Error **errp)
}
trace_qmp_job_finalize(job);
+ job_ref(job);
job_finalize(job, errp);
+
+ /*
+ * Job's context might have changed via job_finalize (and job_txn_apply
+ * automatically acquires the new one), so make sure we release the correct
+ * one.
+ */
+ aio_context = job->aio_context;
+ job_unref(job);
aio_context_release(aio_context);
}
diff --git a/job.c b/job.c
index 134a07b92e..53be57a3a0 100644
--- a/job.c
+++ b/job.c
@@ -136,17 +136,38 @@ static void job_txn_del_job(Job *job)
}
}
-static int job_txn_apply(JobTxn *txn, int fn(Job *))
+static int job_txn_apply(Job *job, int fn(Job *))
{
- Job *job, *next;
+ AioContext *inner_ctx;
+ Job *other_job, *next;
+ JobTxn *txn = job->txn;
int rc = 0;
- QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) {
- rc = fn(job);
+ /*
+ * Similar to job_completed_txn_abort, we take each job's lock before
+ * applying fn, but since we assume that outer_ctx is held by the caller,
+ * we need to release it here to avoid holding the lock twice - which would
+ * break AIO_WAIT_WHILE from within fn.
+ */
+ job_ref(job);
+ aio_context_release(job->aio_context);
+
+ QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
+ inner_ctx = other_job->aio_context;
+ aio_context_acquire(inner_ctx);
+ rc = fn(other_job);
+ aio_context_release(inner_ctx);
if (rc) {
break;
}
}
+
+ /*
+ * Note that job->aio_context might have been changed by calling fn, so we
+ * can't use a local variable to cache it.
+ */
+ aio_context_acquire(job->aio_context);
+ job_unref(job);
return rc;
}
@@ -774,11 +795,11 @@ static void job_do_finalize(Job *job)
assert(job && job->txn);
/* prepare the transaction to complete */
- rc = job_txn_apply(job->txn, job_prepare);
+ rc = job_txn_apply(job, job_prepare);
if (rc) {
job_completed_txn_abort(job);
} else {
- job_txn_apply(job->txn, job_finalize_single);
+ job_txn_apply(job, job_finalize_single);
}
}
@@ -824,10 +845,10 @@ static void job_completed_txn_success(Job *job)
assert(other_job->ret == 0);
}
- job_txn_apply(txn, job_transition_to_pending);
+ job_txn_apply(job, job_transition_to_pending);
/* If no jobs need manual finalization, automatically do so */
- if (job_txn_apply(txn, job_needs_finalize) == 0) {
+ if (job_txn_apply(job, job_needs_finalize) == 0) {
job_do_finalize(job);
}
}
@@ -849,9 +870,10 @@ static void job_completed(Job *job)
static void job_exit(void *opaque)
{
Job *job = (Job *)opaque;
- AioContext *ctx = job->aio_context;
+ AioContext *ctx;
- aio_context_acquire(ctx);
+ job_ref(job);
+ aio_context_acquire(job->aio_context);
/* This is a lie, we're not quiescent, but still doing the completion
* callbacks. However, completion callbacks tend to involve operations that
@@ -862,6 +884,14 @@ static void job_exit(void *opaque)
job_completed(job);
+ /*
+ * Note that calling job_completed can move the job to a different
+ * aio_context, so we cannot cache from above. job_txn_apply takes care of
+ * acquiring the new lock, and we ref/unref to avoid job_completed freeing
+ * the job underneath us.
+ */
+ ctx = job->aio_context;
+ job_unref(job);
aio_context_release(ctx);
}
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
index 4eeb184caf..7519847912 100644
--- a/tests/test-blockjob.c
+++ b/tests/test-blockjob.c
@@ -367,7 +367,9 @@ static void test_cancel_concluded(void)
aio_poll(qemu_get_aio_context(), true);
assert(job->status == JOB_STATUS_PENDING);
+ aio_context_acquire(job->aio_context);
job_finalize(job, &error_abort);
+ aio_context_release(job->aio_context);
assert(job->status == JOB_STATUS_CONCLUDED);
cancel_common(s);