blob: 4abec38866ca8d1a8d7415865e6375a9b7380216 [file] [log] [blame]
aliguoria76bab42008-09-22 19:17:18 +00001/*
2 * QEMU aio implementation
3 *
4 * Copyright IBM, Corp. 2008
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
Paolo Bonzini6b620ca2012-01-13 17:44:23 +010012 * Contributions after 2012-01-13 are licensed under the terms of the
13 * GNU GPL, version 2 or (at your option) any later version.
aliguoria76bab42008-09-22 19:17:18 +000014 */
15
16#include "qemu-common.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010017#include "block/block.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010018#include "qemu/queue.h"
19#include "qemu/sockets.h"
aliguoria76bab42008-09-22 19:17:18 +000020
aliguoria76bab42008-09-22 19:17:18 +000021struct AioHandler
22{
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +020023 GPollFD pfd;
aliguoria76bab42008-09-22 19:17:18 +000024 IOHandler *io_read;
25 IOHandler *io_write;
aliguoria76bab42008-09-22 19:17:18 +000026 int deleted;
27 void *opaque;
Blue Swirl72cf2d42009-09-12 07:36:22 +000028 QLIST_ENTRY(AioHandler) node;
aliguoria76bab42008-09-22 19:17:18 +000029};
30
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020031static AioHandler *find_aio_handler(AioContext *ctx, int fd)
aliguoria76bab42008-09-22 19:17:18 +000032{
33 AioHandler *node;
34
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020035 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +020036 if (node->pfd.fd == fd)
Alexander Graf79d5ca52009-05-06 02:58:48 +020037 if (!node->deleted)
38 return node;
aliguoria76bab42008-09-22 19:17:18 +000039 }
40
41 return NULL;
42}
43
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020044void aio_set_fd_handler(AioContext *ctx,
45 int fd,
46 IOHandler *io_read,
47 IOHandler *io_write,
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020048 void *opaque)
aliguoria76bab42008-09-22 19:17:18 +000049{
50 AioHandler *node;
51
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020052 node = find_aio_handler(ctx, fd);
aliguoria76bab42008-09-22 19:17:18 +000053
54 /* Are we deleting the fd handler? */
55 if (!io_read && !io_write) {
56 if (node) {
Paolo Bonzinie3713e02012-09-24 14:57:41 +020057 g_source_remove_poll(&ctx->source, &node->pfd);
58
aliguoria76bab42008-09-22 19:17:18 +000059 /* If the lock is held, just mark the node as deleted */
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +020060 if (ctx->walking_handlers) {
aliguoria76bab42008-09-22 19:17:18 +000061 node->deleted = 1;
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +020062 node->pfd.revents = 0;
63 } else {
aliguoria76bab42008-09-22 19:17:18 +000064 /* Otherwise, delete it for real. We can't just mark it as
65 * deleted because deleted nodes are only cleaned up after
66 * releasing the walking_handlers lock.
67 */
Blue Swirl72cf2d42009-09-12 07:36:22 +000068 QLIST_REMOVE(node, node);
Anthony Liguori7267c092011-08-20 22:09:37 -050069 g_free(node);
aliguoria76bab42008-09-22 19:17:18 +000070 }
71 }
72 } else {
73 if (node == NULL) {
74 /* Alloc and insert if it's not already there */
Markus Armbruster3ba235a2014-12-04 13:55:09 +010075 node = g_new0(AioHandler, 1);
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +020076 node->pfd.fd = fd;
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020077 QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
Paolo Bonzinie3713e02012-09-24 14:57:41 +020078
79 g_source_add_poll(&ctx->source, &node->pfd);
aliguoria76bab42008-09-22 19:17:18 +000080 }
81 /* Update handler with latest information */
82 node->io_read = io_read;
83 node->io_write = io_write;
aliguoria76bab42008-09-22 19:17:18 +000084 node->opaque = opaque;
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +020085
Stefan Hajnoczib5a01a72013-02-20 11:28:33 +010086 node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
87 node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
aliguoria76bab42008-09-22 19:17:18 +000088 }
Paolo Bonzini7ed2b242012-09-25 10:22:39 +020089
90 aio_notify(ctx);
aliguoria76bab42008-09-22 19:17:18 +000091}
92
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020093void aio_set_event_notifier(AioContext *ctx,
94 EventNotifier *notifier,
Stefan Hajnoczif2e5dca2013-04-11 17:26:25 +020095 EventNotifierHandler *io_read)
Paolo Bonzini9958c352012-06-09 03:44:00 +020096{
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020097 aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
Stefan Hajnoczif2e5dca2013-04-11 17:26:25 +020098 (IOHandler *)io_read, NULL, notifier);
Paolo Bonzini9958c352012-06-09 03:44:00 +020099}
100
Paolo Bonzinia3462c62014-07-09 11:53:08 +0200101bool aio_prepare(AioContext *ctx)
102{
103 return false;
104}
105
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +0200106bool aio_pending(AioContext *ctx)
107{
108 AioHandler *node;
109
110 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
111 int revents;
112
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +0200113 revents = node->pfd.revents & node->pfd.events;
114 if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
115 return true;
116 }
117 if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
118 return true;
119 }
120 }
121
122 return false;
123}
124
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200125bool aio_dispatch(AioContext *ctx)
aliguoria76bab42008-09-22 19:17:18 +0000126{
Paolo Bonzini9eb0bfc2012-04-12 14:00:56 +0200127 AioHandler *node;
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100128 bool progress = false;
aliguoria76bab42008-09-22 19:17:18 +0000129
Kevin Wolf8febfa22009-10-22 17:54:36 +0200130 /*
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200131 * If there are callbacks left that have been queued, we need to call them.
132 * Do not call select in this case, because it is possible that the caller
133 * does not need a complete flush (as is the case for aio_poll loops).
134 */
135 if (aio_bh_poll(ctx)) {
136 progress = true;
137 }
138
139 /*
Paolo Bonzini87f68d32014-07-07 15:18:02 +0200140 * We have to walk very carefully in case aio_set_fd_handler is
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +0200141 * called while we're walking.
142 */
143 node = QLIST_FIRST(&ctx->aio_handlers);
144 while (node) {
145 AioHandler *tmp;
146 int revents;
147
148 ctx->walking_handlers++;
149
150 revents = node->pfd.revents & node->pfd.events;
151 node->pfd.revents = 0;
152
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100153 if (!node->deleted &&
154 (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
155 node->io_read) {
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +0200156 node->io_read(node->opaque);
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200157
158 /* aio_notify() does not count as progress */
159 if (node->opaque != &ctx->notifier) {
160 progress = true;
161 }
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +0200162 }
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100163 if (!node->deleted &&
164 (revents & (G_IO_OUT | G_IO_ERR)) &&
165 node->io_write) {
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +0200166 node->io_write(node->opaque);
167 progress = true;
168 }
169
170 tmp = node;
171 node = QLIST_NEXT(node, node);
172
173 ctx->walking_handlers--;
174
175 if (!ctx->walking_handlers && tmp->deleted) {
176 QLIST_REMOVE(tmp, node);
177 g_free(tmp);
178 }
179 }
Alex Bligh438e1f42013-08-21 16:02:53 +0100180
181 /* Run our timers */
182 progress |= timerlistgroup_run_timers(&ctx->tlg);
183
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100184 return progress;
185}
186
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100187/* These thread-local variables are used only in a small part of aio_poll
188 * around the call to the poll() system call. In particular they are not
189 * used while aio_poll is performing callbacks, which makes it much easier
190 * to think about reentrancy!
191 *
192 * Stack-allocated arrays would be perfect but they have size limitations;
193 * heap allocation is expensive enough that we want to reuse arrays across
194 * calls to aio_poll(). And because poll() has to be called without holding
195 * any lock, the arrays cannot be stored in AioContext. Thread-local data
196 * has none of the disadvantages of these three options.
197 */
198static __thread GPollFD *pollfds;
199static __thread AioHandler **nodes;
200static __thread unsigned npfd, nalloc;
201static __thread Notifier pollfds_cleanup_notifier;
202
203static void pollfds_cleanup(Notifier *n, void *unused)
204{
205 g_assert(npfd == 0);
206 g_free(pollfds);
207 g_free(nodes);
208 nalloc = 0;
209}
210
211static void add_pollfd(AioHandler *node)
212{
213 if (npfd == nalloc) {
214 if (nalloc == 0) {
215 pollfds_cleanup_notifier.notify = pollfds_cleanup;
216 qemu_thread_atexit_add(&pollfds_cleanup_notifier);
217 nalloc = 8;
218 } else {
219 g_assert(nalloc <= INT_MAX);
220 nalloc *= 2;
221 }
222 pollfds = g_renew(GPollFD, pollfds, nalloc);
223 nodes = g_renew(AioHandler *, nodes, nalloc);
224 }
225 nodes[npfd] = node;
226 pollfds[npfd] = (GPollFD) {
227 .fd = node->pfd.fd,
228 .events = node->pfd.events,
229 };
230 npfd++;
231}
232
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100233bool aio_poll(AioContext *ctx, bool blocking)
234{
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100235 AioHandler *node;
Paolo Bonzini0ceb8492014-07-07 15:18:04 +0200236 bool was_dispatching;
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100237 int i, ret;
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200238 bool progress;
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100239 int64_t timeout;
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100240
Paolo Bonzini49110172015-02-20 17:26:51 +0100241 aio_context_acquire(ctx);
Paolo Bonzini0ceb8492014-07-07 15:18:04 +0200242 was_dispatching = ctx->dispatching;
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100243 progress = false;
244
Paolo Bonzini0ceb8492014-07-07 15:18:04 +0200245 /* aio_notify can avoid the expensive event_notifier_set if
246 * everything (file descriptors, bottom halves, timers) will
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200247 * be re-evaluated before the next blocking poll(). This is
248 * already true when aio_poll is called with blocking == false;
249 * if blocking == true, it is only true after poll() returns.
Paolo Bonzini0ceb8492014-07-07 15:18:04 +0200250 *
251 * If we're in a nested event loop, ctx->dispatching might be true.
252 * In that case we can restore it just before returning, but we
253 * have to clear it now.
254 */
255 aio_set_dispatching(ctx, !blocking);
256
Paolo Bonzinia915f4b2012-09-13 12:28:51 +0200257 ctx->walking_handlers++;
aliguoria76bab42008-09-22 19:17:18 +0000258
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100259 assert(npfd == 0);
Paolo Bonzini9eb0bfc2012-04-12 14:00:56 +0200260
Stefan Hajnoczi6b5f8762013-02-20 11:28:32 +0100261 /* fill pollfds */
Paolo Bonzinia915f4b2012-09-13 12:28:51 +0200262 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
Stefan Hajnoczi6b5f8762013-02-20 11:28:32 +0100263 if (!node->deleted && node->pfd.events) {
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100264 add_pollfd(node);
Paolo Bonzini9eb0bfc2012-04-12 14:00:56 +0200265 }
266 }
267
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100268 timeout = blocking ? aio_compute_timeout(ctx) : 0;
Paolo Bonzini9eb0bfc2012-04-12 14:00:56 +0200269
Paolo Bonzini9eb0bfc2012-04-12 14:00:56 +0200270 /* wait until next event */
Paolo Bonzini49110172015-02-20 17:26:51 +0100271 if (timeout) {
272 aio_context_release(ctx);
273 }
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100274 ret = qemu_poll_ns((GPollFD *)pollfds, npfd, timeout);
Paolo Bonzini49110172015-02-20 17:26:51 +0100275 if (timeout) {
276 aio_context_acquire(ctx);
277 }
Paolo Bonzini9eb0bfc2012-04-12 14:00:56 +0200278
279 /* if we have any readable fds, dispatch event */
280 if (ret > 0) {
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100281 for (i = 0; i < npfd; i++) {
282 nodes[i]->pfd.revents = pollfds[i].revents;
Stefan Hajnoczi6b5f8762013-02-20 11:28:32 +0100283 }
Alex Bligh438e1f42013-08-21 16:02:53 +0100284 }
285
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100286 npfd = 0;
287 ctx->walking_handlers--;
288
Alex Bligh438e1f42013-08-21 16:02:53 +0100289 /* Run dispatch even if there were no readable fds to run timers */
Paolo Bonzini0ceb8492014-07-07 15:18:04 +0200290 aio_set_dispatching(ctx, true);
Alex Bligh438e1f42013-08-21 16:02:53 +0100291 if (aio_dispatch(ctx)) {
292 progress = true;
Paolo Bonzini9eb0bfc2012-04-12 14:00:56 +0200293 }
Paolo Bonzinibcdc1852012-04-12 14:00:55 +0200294
Paolo Bonzini0ceb8492014-07-07 15:18:04 +0200295 aio_set_dispatching(ctx, was_dispatching);
Paolo Bonzini49110172015-02-20 17:26:51 +0100296 aio_context_release(ctx);
297
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200298 return progress;
aliguoria76bab42008-09-22 19:17:18 +0000299}