aboutsummaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2015-07-21 16:07:52 +0200
committerStefan Hajnoczi <stefanha@redhat.com>2015-07-22 12:41:40 +0100
commit21a03d17f2edb1e63f7137d97ba355cc6f19d79f (patch)
tree37068ed33f88b2e8a8eae78a63d798f6f7aa4a30 /docs
parenteabc977973103527bbb8fed69c91cfaa6691f8ab (diff)
AioContext: fix broken placement of event_notifier_test_and_clear
event_notifier_test_and_clear must be called before processing events. Otherwise, an aio_poll could "eat" the notification before the main I/O thread invokes ppoll(). The main I/O thread then never wakes up. This is an example of what could happen: i/o thread vcpu thread worker thread --------------------------------------------------------------------- lock_iothread notify_me = 1 ... unlock_iothread bh->scheduled = 1 event_notifier_set lock_iothread notify_me = 3 ppoll notify_me = 1 aio_dispatch aio_bh_poll thread_pool_completion_bh bh->scheduled = 1 event_notifier_set node->io_read(node->opaque) event_notifier_test_and_clear ppoll *** hang *** "Tracing" with qemu_clock_get_ns shows pretty much the same behavior as in the previous bug, so there are no new tricks here---just stare more at the code until it is apparent. One could also use a formal model, of course. The included one shows this with three processes: notifier corresponds to a QEMU thread pool worker, temporary_waiter to a VCPU thread that invokes aio_poll(), waiter to the main I/O thread. I would be happy to say that the formal model found the bug for me, but actually I wrote it after the fact. This patch is a bit of a big hammer. The next one optimizes it, with help (this time for real rather than a posteriori :)) from another, similar formal model. Reported-by: Richard W. M. Jones <rjones@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Fam Zheng <famz@redhat.com> Tested-by: Richard W.M. Jones <rjones@redhat.com> Message-id: 1437487673-23740-6-git-send-email-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'docs')
-rw-r--r--docs/aio_notify_bug.promela140
1 files changed, 140 insertions, 0 deletions
diff --git a/docs/aio_notify_bug.promela b/docs/aio_notify_bug.promela
new file mode 100644
index 0000000000..b3bfca1ca4
--- /dev/null
+++ b/docs/aio_notify_bug.promela
@@ -0,0 +1,140 @@
+/*
+ * This model describes a bug in aio_notify. If ctx->notifier is
+ * cleared too late, a wakeup could be lost.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This file is in the public domain. If you really want a license,
+ * the WTFPL will do.
+ *
+ * To verify the buggy version:
+ * spin -a -DBUG docs/aio_notify_bug.promela
+ * gcc -O2 pan.c
+ * ./a.out -a -f
+ *
+ * To verify the fixed version:
+ * spin -a docs/aio_notify_bug.promela
+ * gcc -O2 pan.c
+ * ./a.out -a -f
+ *
+ * Add -DCHECK_REQ to test an alternative invariant and the
+ * "notify_me" optimization.
+ */
+
+int notify_me;
+bool event;
+bool req;
+bool notifier_done;
+
+#ifdef CHECK_REQ
+#define USE_NOTIFY_ME 1
+#else
+#define USE_NOTIFY_ME 0
+#endif
+
+active proctype notifier()
+{
+ do
+ :: true -> {
+ req = 1;
+ if
+ :: !USE_NOTIFY_ME || notify_me -> event = 1;
+ :: else -> skip;
+ fi
+ }
+ :: true -> break;
+ od;
+ notifier_done = 1;
+}
+
+#ifdef BUG
+#define AIO_POLL \
+ notify_me++; \
+ if \
+ :: !req -> { \
+ if \
+ :: event -> skip; \
+ fi; \
+ } \
+ :: else -> skip; \
+ fi; \
+ notify_me--; \
+ \
+ req = 0; \
+ event = 0;
+#else
+#define AIO_POLL \
+ notify_me++; \
+ if \
+ :: !req -> { \
+ if \
+ :: event -> skip; \
+ fi; \
+ } \
+ :: else -> skip; \
+ fi; \
+ notify_me--; \
+ \
+ event = 0; \
+ req = 0;
+#endif
+
+active proctype waiter()
+{
+ do
+ :: true -> AIO_POLL;
+ od;
+}
+
+/* Same as waiter(), but disappears after a while. */
+active proctype temporary_waiter()
+{
+ do
+ :: true -> AIO_POLL;
+ :: true -> break;
+ od;
+}
+
+#ifdef CHECK_REQ
+never {
+ do
+ :: req -> goto accept_if_req_not_eventually_false;
+ :: true -> skip;
+ od;
+
+accept_if_req_not_eventually_false:
+ if
+ :: req -> goto accept_if_req_not_eventually_false;
+ fi;
+ assert(0);
+}
+
+#else
+/* There must be infinitely many transitions of event as long
+ * as the notifier does not exit.
+ *
+ * If event stayed always true, the waiters would be busy looping.
+ * If event stayed always false, the waiters would be sleeping
+ * forever.
+ */
+never {
+ do
+ :: !event -> goto accept_if_event_not_eventually_true;
+ :: event -> goto accept_if_event_not_eventually_false;
+ :: true -> skip;
+ od;
+
+accept_if_event_not_eventually_true:
+ if
+ :: !event && notifier_done -> do :: true -> skip; od;
+ :: !event && !notifier_done -> goto accept_if_event_not_eventually_true;
+ fi;
+ assert(0);
+
+accept_if_event_not_eventually_false:
+ if
+ :: event -> goto accept_if_event_not_eventually_false;
+ fi;
+ assert(0);
+}
+#endif