blob: 5bff3cd670b32437b807457d7fc19a739cf4bbc7 [file] [log] [blame]
aliguoria76bab42008-09-22 19:17:18 +00001/*
2 * QEMU aio implementation
3 *
4 * Copyright IBM, Corp. 2008
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
Paolo Bonzini6b620ca2012-01-13 17:44:23 +010012 * Contributions after 2012-01-13 are licensed under the terms of the
13 * GNU GPL, version 2 or (at your option) any later version.
aliguoria76bab42008-09-22 19:17:18 +000014 */
15
16#include "qemu-common.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010017#include "block/block.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010018#include "qemu/queue.h"
19#include "qemu/sockets.h"
aliguoria76bab42008-09-22 19:17:18 +000020
aliguoria76bab42008-09-22 19:17:18 +000021struct AioHandler
22{
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +020023 GPollFD pfd;
aliguoria76bab42008-09-22 19:17:18 +000024 IOHandler *io_read;
25 IOHandler *io_write;
aliguoria76bab42008-09-22 19:17:18 +000026 int deleted;
27 void *opaque;
Fam Zhengdca21ef2015-10-23 11:08:05 +080028 bool is_external;
Blue Swirl72cf2d42009-09-12 07:36:22 +000029 QLIST_ENTRY(AioHandler) node;
aliguoria76bab42008-09-22 19:17:18 +000030};
31
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020032static AioHandler *find_aio_handler(AioContext *ctx, int fd)
aliguoria76bab42008-09-22 19:17:18 +000033{
34 AioHandler *node;
35
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020036 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +020037 if (node->pfd.fd == fd)
Alexander Graf79d5ca52009-05-06 02:58:48 +020038 if (!node->deleted)
39 return node;
aliguoria76bab42008-09-22 19:17:18 +000040 }
41
42 return NULL;
43}
44
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020045void aio_set_fd_handler(AioContext *ctx,
46 int fd,
Fam Zhengdca21ef2015-10-23 11:08:05 +080047 bool is_external,
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020048 IOHandler *io_read,
49 IOHandler *io_write,
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020050 void *opaque)
aliguoria76bab42008-09-22 19:17:18 +000051{
52 AioHandler *node;
53
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020054 node = find_aio_handler(ctx, fd);
aliguoria76bab42008-09-22 19:17:18 +000055
56 /* Are we deleting the fd handler? */
57 if (!io_read && !io_write) {
58 if (node) {
Paolo Bonzinie3713e02012-09-24 14:57:41 +020059 g_source_remove_poll(&ctx->source, &node->pfd);
60
aliguoria76bab42008-09-22 19:17:18 +000061 /* If the lock is held, just mark the node as deleted */
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +020062 if (ctx->walking_handlers) {
aliguoria76bab42008-09-22 19:17:18 +000063 node->deleted = 1;
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +020064 node->pfd.revents = 0;
65 } else {
aliguoria76bab42008-09-22 19:17:18 +000066 /* Otherwise, delete it for real. We can't just mark it as
67 * deleted because deleted nodes are only cleaned up after
68 * releasing the walking_handlers lock.
69 */
Blue Swirl72cf2d42009-09-12 07:36:22 +000070 QLIST_REMOVE(node, node);
Anthony Liguori7267c092011-08-20 22:09:37 -050071 g_free(node);
aliguoria76bab42008-09-22 19:17:18 +000072 }
73 }
74 } else {
75 if (node == NULL) {
76 /* Alloc and insert if it's not already there */
Markus Armbruster3ba235a2014-12-04 13:55:09 +010077 node = g_new0(AioHandler, 1);
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +020078 node->pfd.fd = fd;
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020079 QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
Paolo Bonzinie3713e02012-09-24 14:57:41 +020080
81 g_source_add_poll(&ctx->source, &node->pfd);
aliguoria76bab42008-09-22 19:17:18 +000082 }
83 /* Update handler with latest information */
84 node->io_read = io_read;
85 node->io_write = io_write;
aliguoria76bab42008-09-22 19:17:18 +000086 node->opaque = opaque;
Fam Zhengdca21ef2015-10-23 11:08:05 +080087 node->is_external = is_external;
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +020088
Stefan Hajnoczib5a01a72013-02-20 11:28:33 +010089 node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
90 node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
aliguoria76bab42008-09-22 19:17:18 +000091 }
Paolo Bonzini7ed2b242012-09-25 10:22:39 +020092
93 aio_notify(ctx);
aliguoria76bab42008-09-22 19:17:18 +000094}
95
Paolo Bonzinia915f4b2012-09-13 12:28:51 +020096void aio_set_event_notifier(AioContext *ctx,
97 EventNotifier *notifier,
Fam Zhengdca21ef2015-10-23 11:08:05 +080098 bool is_external,
Stefan Hajnoczif2e5dca2013-04-11 17:26:25 +020099 EventNotifierHandler *io_read)
Paolo Bonzini9958c352012-06-09 03:44:00 +0200100{
Paolo Bonzinia915f4b2012-09-13 12:28:51 +0200101 aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
Fam Zhengdca21ef2015-10-23 11:08:05 +0800102 is_external, (IOHandler *)io_read, NULL, notifier);
Paolo Bonzini9958c352012-06-09 03:44:00 +0200103}
104
Paolo Bonzinia3462c62014-07-09 11:53:08 +0200105bool aio_prepare(AioContext *ctx)
106{
107 return false;
108}
109
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +0200110bool aio_pending(AioContext *ctx)
111{
112 AioHandler *node;
113
114 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
115 int revents;
116
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +0200117 revents = node->pfd.revents & node->pfd.events;
118 if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
119 return true;
120 }
121 if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
122 return true;
123 }
124 }
125
126 return false;
127}
128
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200129bool aio_dispatch(AioContext *ctx)
aliguoria76bab42008-09-22 19:17:18 +0000130{
Paolo Bonzini9eb0bfc2012-04-12 14:00:56 +0200131 AioHandler *node;
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100132 bool progress = false;
aliguoria76bab42008-09-22 19:17:18 +0000133
Kevin Wolf8febfa22009-10-22 17:54:36 +0200134 /*
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200135 * If there are callbacks left that have been queued, we need to call them.
136 * Do not call select in this case, because it is possible that the caller
137 * does not need a complete flush (as is the case for aio_poll loops).
138 */
139 if (aio_bh_poll(ctx)) {
140 progress = true;
141 }
142
143 /*
Paolo Bonzini87f68d32014-07-07 15:18:02 +0200144 * We have to walk very carefully in case aio_set_fd_handler is
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +0200145 * called while we're walking.
146 */
147 node = QLIST_FIRST(&ctx->aio_handlers);
148 while (node) {
149 AioHandler *tmp;
150 int revents;
151
152 ctx->walking_handlers++;
153
154 revents = node->pfd.revents & node->pfd.events;
155 node->pfd.revents = 0;
156
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100157 if (!node->deleted &&
158 (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
159 node->io_read) {
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +0200160 node->io_read(node->opaque);
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200161
162 /* aio_notify() does not count as progress */
163 if (node->opaque != &ctx->notifier) {
164 progress = true;
165 }
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +0200166 }
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100167 if (!node->deleted &&
168 (revents & (G_IO_OUT | G_IO_ERR)) &&
169 node->io_write) {
Paolo Bonzinicd9ba1e2012-09-24 14:57:22 +0200170 node->io_write(node->opaque);
171 progress = true;
172 }
173
174 tmp = node;
175 node = QLIST_NEXT(node, node);
176
177 ctx->walking_handlers--;
178
179 if (!ctx->walking_handlers && tmp->deleted) {
180 QLIST_REMOVE(tmp, node);
181 g_free(tmp);
182 }
183 }
Alex Bligh438e1f42013-08-21 16:02:53 +0100184
185 /* Run our timers */
186 progress |= timerlistgroup_run_timers(&ctx->tlg);
187
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100188 return progress;
189}
190
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100191/* These thread-local variables are used only in a small part of aio_poll
192 * around the call to the poll() system call. In particular they are not
193 * used while aio_poll is performing callbacks, which makes it much easier
194 * to think about reentrancy!
195 *
196 * Stack-allocated arrays would be perfect but they have size limitations;
197 * heap allocation is expensive enough that we want to reuse arrays across
198 * calls to aio_poll(). And because poll() has to be called without holding
199 * any lock, the arrays cannot be stored in AioContext. Thread-local data
200 * has none of the disadvantages of these three options.
201 */
202static __thread GPollFD *pollfds;
203static __thread AioHandler **nodes;
204static __thread unsigned npfd, nalloc;
205static __thread Notifier pollfds_cleanup_notifier;
206
207static void pollfds_cleanup(Notifier *n, void *unused)
208{
209 g_assert(npfd == 0);
210 g_free(pollfds);
211 g_free(nodes);
212 nalloc = 0;
213}
214
215static void add_pollfd(AioHandler *node)
216{
217 if (npfd == nalloc) {
218 if (nalloc == 0) {
219 pollfds_cleanup_notifier.notify = pollfds_cleanup;
220 qemu_thread_atexit_add(&pollfds_cleanup_notifier);
221 nalloc = 8;
222 } else {
223 g_assert(nalloc <= INT_MAX);
224 nalloc *= 2;
225 }
226 pollfds = g_renew(GPollFD, pollfds, nalloc);
227 nodes = g_renew(AioHandler *, nodes, nalloc);
228 }
229 nodes[npfd] = node;
230 pollfds[npfd] = (GPollFD) {
231 .fd = node->pfd.fd,
232 .events = node->pfd.events,
233 };
234 npfd++;
235}
236
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100237bool aio_poll(AioContext *ctx, bool blocking)
238{
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100239 AioHandler *node;
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100240 int i, ret;
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200241 bool progress;
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100242 int64_t timeout;
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100243
Paolo Bonzini49110172015-02-20 17:26:51 +0100244 aio_context_acquire(ctx);
Stefan Hajnoczid0c8d2c2013-02-20 11:28:31 +0100245 progress = false;
246
Paolo Bonzini0ceb8492014-07-07 15:18:04 +0200247 /* aio_notify can avoid the expensive event_notifier_set if
248 * everything (file descriptors, bottom halves, timers) will
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200249 * be re-evaluated before the next blocking poll(). This is
250 * already true when aio_poll is called with blocking == false;
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200251 * if blocking == true, it is only true after poll() returns,
252 * so disable the optimization now.
Paolo Bonzini0ceb8492014-07-07 15:18:04 +0200253 */
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200254 if (blocking) {
255 atomic_add(&ctx->notify_me, 2);
256 }
Paolo Bonzini0ceb8492014-07-07 15:18:04 +0200257
Paolo Bonzinia915f4b2012-09-13 12:28:51 +0200258 ctx->walking_handlers++;
aliguoria76bab42008-09-22 19:17:18 +0000259
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100260 assert(npfd == 0);
Paolo Bonzini9eb0bfc2012-04-12 14:00:56 +0200261
Stefan Hajnoczi6b5f8762013-02-20 11:28:32 +0100262 /* fill pollfds */
Paolo Bonzinia915f4b2012-09-13 12:28:51 +0200263 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
Fam Zhengc1e1e5f2015-10-23 11:08:08 +0800264 if (!node->deleted && node->pfd.events
265 && aio_node_check(ctx, node->is_external)) {
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100266 add_pollfd(node);
Paolo Bonzini9eb0bfc2012-04-12 14:00:56 +0200267 }
268 }
269
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100270 timeout = blocking ? aio_compute_timeout(ctx) : 0;
Paolo Bonzini9eb0bfc2012-04-12 14:00:56 +0200271
Paolo Bonzini9eb0bfc2012-04-12 14:00:56 +0200272 /* wait until next event */
Paolo Bonzini49110172015-02-20 17:26:51 +0100273 if (timeout) {
274 aio_context_release(ctx);
275 }
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100276 ret = qemu_poll_ns((GPollFD *)pollfds, npfd, timeout);
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200277 if (blocking) {
278 atomic_sub(&ctx->notify_me, 2);
279 }
Paolo Bonzini49110172015-02-20 17:26:51 +0100280 if (timeout) {
281 aio_context_acquire(ctx);
282 }
Paolo Bonzini9eb0bfc2012-04-12 14:00:56 +0200283
Paolo Bonzini05e514b2015-07-21 16:07:53 +0200284 aio_notify_accept(ctx);
Paolo Bonzini21a03d12015-07-21 16:07:52 +0200285
Paolo Bonzini9eb0bfc2012-04-12 14:00:56 +0200286 /* if we have any readable fds, dispatch event */
287 if (ret > 0) {
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100288 for (i = 0; i < npfd; i++) {
289 nodes[i]->pfd.revents = pollfds[i].revents;
Stefan Hajnoczi6b5f8762013-02-20 11:28:32 +0100290 }
Alex Bligh438e1f42013-08-21 16:02:53 +0100291 }
292
Paolo Bonzinie98ab092015-02-20 17:26:50 +0100293 npfd = 0;
294 ctx->walking_handlers--;
295
Alex Bligh438e1f42013-08-21 16:02:53 +0100296 /* Run dispatch even if there were no readable fds to run timers */
297 if (aio_dispatch(ctx)) {
298 progress = true;
Paolo Bonzini9eb0bfc2012-04-12 14:00:56 +0200299 }
Paolo Bonzinibcdc1852012-04-12 14:00:55 +0200300
Paolo Bonzini49110172015-02-20 17:26:51 +0100301 aio_context_release(ctx);
302
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200303 return progress;
aliguoria76bab42008-09-22 19:17:18 +0000304}
Fam Zheng37fcee52015-10-30 12:06:28 +0800305
306void aio_context_setup(AioContext *ctx, Error **errp)
307{
308}