aboutsummaryrefslogtreecommitdiff
path: root/async.c
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2015-07-21 16:07:52 +0200
committerStefan Hajnoczi <stefanha@redhat.com>2015-07-22 12:41:40 +0100
commit21a03d17f2edb1e63f7137d97ba355cc6f19d79f (patch)
tree37068ed33f88b2e8a8eae78a63d798f6f7aa4a30 /async.c
parenteabc977973103527bbb8fed69c91cfaa6691f8ab (diff)
AioContext: fix broken placement of event_notifier_test_and_clear
event_notifier_test_and_clear must be called before processing events. Otherwise, an aio_poll could "eat" the notification before the main I/O thread invokes ppoll(). The main I/O thread then never wakes up. This is an example of what could happen: i/o thread vcpu thread worker thread --------------------------------------------------------------------- lock_iothread notify_me = 1 ... unlock_iothread bh->scheduled = 1 event_notifier_set lock_iothread notify_me = 3 ppoll notify_me = 1 aio_dispatch aio_bh_poll thread_pool_completion_bh bh->scheduled = 1 event_notifier_set node->io_read(node->opaque) event_notifier_test_and_clear ppoll *** hang *** "Tracing" with qemu_clock_get_ns shows pretty much the same behavior as in the previous bug, so there are no new tricks here---just stare more at the code until it is apparent. One could also use a formal model, of course. The included one shows this with three processes: notifier corresponds to a QEMU thread pool worker, temporary_waiter to a VCPU thread that invokes aio_poll(), waiter to the main I/O thread. I would be happy to say that the formal model found the bug for me, but actually I wrote it after the fact. This patch is a bit of a big hammer. The next one optimizes it, with help (this time for real rather than a posteriori :)) from another, similar formal model. Reported-by: Richard W. M. Jones <rjones@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Fam Zheng <famz@redhat.com> Tested-by: Richard W.M. Jones <rjones@redhat.com> Message-id: 1437487673-23740-6-git-send-email-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'async.c')
-rw-r--r--async.c8
1 files changed, 7 insertions, 1 deletions
diff --git a/async.c b/async.c
index a232192148..d625e8a803 100644
--- a/async.c
+++ b/async.c
@@ -203,6 +203,8 @@ aio_ctx_check(GSource *source)
QEMUBH *bh;
atomic_and(&ctx->notify_me, ~1);
+ event_notifier_test_and_clear(&ctx->notifier);
+
for (bh = ctx->first_bh; bh; bh = bh->next) {
if (!bh->deleted && bh->scheduled) {
return true;
@@ -279,6 +281,10 @@ static void aio_rfifolock_cb(void *opaque)
aio_notify(opaque);
}
+static void event_notifier_dummy_cb(EventNotifier *e)
+{
+}
+
AioContext *aio_context_new(Error **errp)
{
int ret;
@@ -293,7 +299,7 @@ AioContext *aio_context_new(Error **errp)
g_source_set_can_recurse(&ctx->source, true);
aio_set_event_notifier(ctx, &ctx->notifier,
(EventNotifierHandler *)
- event_notifier_test_and_clear);
+ event_notifier_dummy_cb);
ctx->thread_pool = NULL;
qemu_mutex_init(&ctx->bh_lock);
rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);