aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/linux-aio.c125
1 files changed, 99 insertions, 26 deletions
diff --git a/block/linux-aio.c b/block/linux-aio.c
index e906abebb3..62ee1ea8a5 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -59,7 +59,6 @@ struct LinuxAioState {
/* I/O completion processing */
QEMUBH *completion_bh;
- struct io_event events[MAX_EVENTS];
int event_idx;
int event_max;
};
@@ -102,6 +101,85 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
}
}
+/**
+ * aio_ring buffer which is shared between userspace and kernel.
+ *
+ * This copied from linux/fs/aio.c, common header does not exist
+ * but AIO exists for ages so we assume ABI is stable.
+ */
+struct aio_ring {
+ unsigned id; /* kernel internal index number */
+ unsigned nr; /* number of io_events */
+ unsigned head; /* Written to by userland or by kernel. */
+ unsigned tail;
+
+ unsigned magic;
+ unsigned compat_features;
+ unsigned incompat_features;
+ unsigned header_length; /* size of aio_ring */
+
+ struct io_event io_events[0];
+};
+
+/**
+ * io_getevents_peek:
+ * @ctx: AIO context
+ * @events: pointer on events array, output value
+
+ * Returns the number of completed events and sets a pointer
+ * on events array. This function does not update the internal
+ * ring buffer, only reads head and tail. When @events has been
+ * processed io_getevents_commit() must be called.
+ */
+static inline unsigned int io_getevents_peek(io_context_t ctx,
+ struct io_event **events)
+{
+ struct aio_ring *ring = (struct aio_ring *)ctx;
+ unsigned int head = ring->head, tail = ring->tail;
+ unsigned int nr;
+
+ nr = tail >= head ? tail - head : ring->nr - head;
+ *events = ring->io_events + head;
+ /* To avoid speculative loads of s->events[i] before observing tail.
+ Paired with smp_wmb() inside linux/fs/aio.c: aio_complete(). */
+ smp_rmb();
+
+ return nr;
+}
+
+/**
+ * io_getevents_commit:
+ * @ctx: AIO context
+ * @nr: the number of events on which head should be advanced
+ *
+ * Advances head of a ring buffer.
+ */
+static inline void io_getevents_commit(io_context_t ctx, unsigned int nr)
+{
+ struct aio_ring *ring = (struct aio_ring *)ctx;
+
+ if (nr) {
+ ring->head = (ring->head + nr) % ring->nr;
+ }
+}
+
+/**
+ * io_getevents_advance_and_peek:
+ * @ctx: AIO context
+ * @events: pointer on events array, output value
+ * @nr: the number of events on which head should be advanced
+ *
+ * Advances head of a ring buffer and returns number of elements left.
+ */
+static inline unsigned int
+io_getevents_advance_and_peek(io_context_t ctx,
+ struct io_event **events,
+ unsigned int nr)
+{
+ io_getevents_commit(ctx, nr);
+ return io_getevents_peek(ctx, events);
+}
+
/* The completion BH fetches completed I/O requests and invokes their
* callbacks.
*
@@ -116,43 +194,38 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
static void qemu_laio_completion_bh(void *opaque)
{
LinuxAioState *s = opaque;
-
- /* Fetch more completion events when empty */
- if (s->event_idx == s->event_max) {
- do {
- struct timespec ts = { 0 };
- s->event_max = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS,
- s->events, &ts);
- } while (s->event_max == -EINTR);
-
- s->event_idx = 0;
- if (s->event_max <= 0) {
- s->event_max = 0;
- return; /* no more events */
- }
- s->io_q.in_flight -= s->event_max;
- }
+ struct io_event *events;
/* Reschedule so nested event loops see currently pending completions */
qemu_bh_schedule(s->completion_bh);
- /* Process completion events */
- while (s->event_idx < s->event_max) {
- struct iocb *iocb = s->events[s->event_idx].obj;
- struct qemu_laiocb *laiocb =
+ while ((s->event_max = io_getevents_advance_and_peek(s->ctx, &events,
+ s->event_idx))) {
+ for (s->event_idx = 0; s->event_idx < s->event_max; ) {
+ struct iocb *iocb = events[s->event_idx].obj;
+ struct qemu_laiocb *laiocb =
container_of(iocb, struct qemu_laiocb, iocb);
- laiocb->ret = io_event_ret(&s->events[s->event_idx]);
- s->event_idx++;
+ laiocb->ret = io_event_ret(&events[s->event_idx]);
- qemu_laio_process_completion(laiocb);
+ /* Change counters one-by-one because we can be nested. */
+ s->io_q.in_flight--;
+ s->event_idx++;
+ qemu_laio_process_completion(laiocb);
+ }
}
+ qemu_bh_cancel(s->completion_bh);
+
+ /* If we are nested we have to notify the level above that we are done
+ * by setting event_max to zero, upper level will then jump out of it's
+ * own `for` loop. If we are the last all counters droped to zero. */
+ s->event_max = 0;
+ s->event_idx = 0;
+
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
ioq_submit(s);
}
-
- qemu_bh_cancel(s->completion_bh);
}
static void qemu_laio_completion_cb(EventNotifier *e)