blob: 0a6e91b0c37799d4b0f272aad8f59d31cfdd24e4 [file] [log] [blame]
Paolo Bonzinif42b2202012-06-09 04:01:51 +02001/*
2 * QEMU aio implementation
3 *
4 * Copyright IBM Corp., 2008
5 * Copyright Red Hat Inc., 2012
6 *
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 * Paolo Bonzini <pbonzini@redhat.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2. See
12 * the COPYING file in the top-level directory.
13 *
14 * Contributions after 2012-01-13 are licensed under the terms of the
15 * GNU GPL, version 2 or (at your option) any later version.
16 */
17
Peter Maydelld38ea872016-01-29 17:50:05 +000018#include "qemu/osdep.h"
Paolo Bonzinif42b2202012-06-09 04:01:51 +020019#include "qemu-common.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010020#include "block/block.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010021#include "qemu/queue.h"
22#include "qemu/sockets.h"
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +000023#include "qapi/error.h"
Paolo Bonzinif42b2202012-06-09 04:01:51 +020024
25struct AioHandler {
26 EventNotifier *e;
Paolo Bonzinib4933172014-07-09 11:53:10 +020027 IOHandler *io_read;
28 IOHandler *io_write;
Paolo Bonzinif42b2202012-06-09 04:01:51 +020029 EventNotifierHandler *io_notify;
Paolo Bonzinif42b2202012-06-09 04:01:51 +020030 GPollFD pfd;
31 int deleted;
Paolo Bonzinib4933172014-07-09 11:53:10 +020032 void *opaque;
Fam Zhengdca21ef2015-10-23 11:08:05 +080033 bool is_external;
Paolo Bonzinif42b2202012-06-09 04:01:51 +020034 QLIST_ENTRY(AioHandler) node;
35};
36
Paolo Bonzinib4933172014-07-09 11:53:10 +020037void aio_set_fd_handler(AioContext *ctx,
38 int fd,
Fam Zhengdca21ef2015-10-23 11:08:05 +080039 bool is_external,
Paolo Bonzinib4933172014-07-09 11:53:10 +020040 IOHandler *io_read,
41 IOHandler *io_write,
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +000042 AioPollFn *io_poll,
Paolo Bonzinib4933172014-07-09 11:53:10 +020043 void *opaque)
44{
45 /* fd is a SOCKET in our case */
46 AioHandler *node;
47
48 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
49 if (node->pfd.fd == fd && !node->deleted) {
50 break;
51 }
52 }
53
54 /* Are we deleting the fd handler? */
55 if (!io_read && !io_write) {
56 if (node) {
57 /* If the lock is held, just mark the node as deleted */
58 if (ctx->walking_handlers) {
59 node->deleted = 1;
60 node->pfd.revents = 0;
61 } else {
62 /* Otherwise, delete it for real. We can't just mark it as
63 * deleted because deleted nodes are only cleaned up after
64 * releasing the walking_handlers lock.
65 */
66 QLIST_REMOVE(node, node);
67 g_free(node);
68 }
69 }
70 } else {
71 HANDLE event;
72
73 if (node == NULL) {
74 /* Alloc and insert if it's not already there */
Markus Armbruster3ba235a2014-12-04 13:55:09 +010075 node = g_new0(AioHandler, 1);
Paolo Bonzinib4933172014-07-09 11:53:10 +020076 node->pfd.fd = fd;
77 QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
78 }
79
80 node->pfd.events = 0;
81 if (node->io_read) {
82 node->pfd.events |= G_IO_IN;
83 }
84 if (node->io_write) {
85 node->pfd.events |= G_IO_OUT;
86 }
87
88 node->e = &ctx->notifier;
89
90 /* Update handler with latest information */
91 node->opaque = opaque;
92 node->io_read = io_read;
93 node->io_write = io_write;
Fam Zhengdca21ef2015-10-23 11:08:05 +080094 node->is_external = is_external;
Paolo Bonzinib4933172014-07-09 11:53:10 +020095
96 event = event_notifier_get_handle(&ctx->notifier);
97 WSAEventSelect(node->pfd.fd, event,
98 FD_READ | FD_ACCEPT | FD_CLOSE |
99 FD_CONNECT | FD_WRITE | FD_OOB);
100 }
101
102 aio_notify(ctx);
103}
104
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200105void aio_set_event_notifier(AioContext *ctx,
106 EventNotifier *e,
Fam Zhengdca21ef2015-10-23 11:08:05 +0800107 bool is_external,
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +0000108 EventNotifierHandler *io_notify,
109 AioPollFn *io_poll)
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200110{
111 AioHandler *node;
112
113 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
114 if (node->e == e && !node->deleted) {
115 break;
116 }
117 }
118
119 /* Are we deleting the fd handler? */
120 if (!io_notify) {
121 if (node) {
Paolo Bonzinie3713e02012-09-24 14:57:41 +0200122 g_source_remove_poll(&ctx->source, &node->pfd);
123
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200124 /* If the lock is held, just mark the node as deleted */
125 if (ctx->walking_handlers) {
126 node->deleted = 1;
127 node->pfd.revents = 0;
128 } else {
129 /* Otherwise, delete it for real. We can't just mark it as
130 * deleted because deleted nodes are only cleaned up after
131 * releasing the walking_handlers lock.
132 */
133 QLIST_REMOVE(node, node);
134 g_free(node);
135 }
136 }
137 } else {
138 if (node == NULL) {
139 /* Alloc and insert if it's not already there */
Markus Armbruster3ba235a2014-12-04 13:55:09 +0100140 node = g_new0(AioHandler, 1);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200141 node->e = e;
142 node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
143 node->pfd.events = G_IO_IN;
Fam Zhengdca21ef2015-10-23 11:08:05 +0800144 node->is_external = is_external;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200145 QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
Paolo Bonzinie3713e02012-09-24 14:57:41 +0200146
147 g_source_add_poll(&ctx->source, &node->pfd);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200148 }
149 /* Update handler with latest information */
150 node->io_notify = io_notify;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200151 }
Paolo Bonzini7ed2b242012-09-25 10:22:39 +0200152
153 aio_notify(ctx);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200154}
155
Paolo Bonzinia3462c62014-07-09 11:53:08 +0200156bool aio_prepare(AioContext *ctx)
157{
Paolo Bonzinib4933172014-07-09 11:53:10 +0200158 static struct timeval tv0;
159 AioHandler *node;
160 bool have_select_revents = false;
161 fd_set rfds, wfds;
162
163 /* fill fd sets */
164 FD_ZERO(&rfds);
165 FD_ZERO(&wfds);
166 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
167 if (node->io_read) {
168 FD_SET ((SOCKET)node->pfd.fd, &rfds);
169 }
170 if (node->io_write) {
171 FD_SET ((SOCKET)node->pfd.fd, &wfds);
172 }
173 }
174
175 if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
176 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
177 node->pfd.revents = 0;
178 if (FD_ISSET(node->pfd.fd, &rfds)) {
179 node->pfd.revents |= G_IO_IN;
180 have_select_revents = true;
181 }
182
183 if (FD_ISSET(node->pfd.fd, &wfds)) {
184 node->pfd.revents |= G_IO_OUT;
185 have_select_revents = true;
186 }
187 }
188 }
189
190 return have_select_revents;
Paolo Bonzinia3462c62014-07-09 11:53:08 +0200191}
192
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200193bool aio_pending(AioContext *ctx)
194{
195 AioHandler *node;
196
197 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
198 if (node->pfd.revents && node->io_notify) {
199 return true;
200 }
Paolo Bonzinib4933172014-07-09 11:53:10 +0200201
202 if ((node->pfd.revents & G_IO_IN) && node->io_read) {
203 return true;
204 }
205 if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
206 return true;
207 }
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200208 }
209
210 return false;
211}
212
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200213static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200214{
215 AioHandler *node;
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200216 bool progress = false;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200217
218 /*
Paolo Bonzini87f68d32014-07-07 15:18:02 +0200219 * We have to walk very carefully in case aio_set_fd_handler is
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200220 * called while we're walking.
221 */
222 node = QLIST_FIRST(&ctx->aio_handlers);
223 while (node) {
224 AioHandler *tmp;
Paolo Bonzinib4933172014-07-09 11:53:10 +0200225 int revents = node->pfd.revents;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200226
227 ctx->walking_handlers++;
228
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200229 if (!node->deleted &&
Paolo Bonzinib4933172014-07-09 11:53:10 +0200230 (revents || event_notifier_get_handle(node->e) == event) &&
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200231 node->io_notify) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200232 node->pfd.revents = 0;
233 node->io_notify(node->e);
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200234
235 /* aio_notify() does not count as progress */
Stefan Hajnoczi8b2d42d2013-08-22 15:28:35 +0200236 if (node->e != &ctx->notifier) {
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200237 progress = true;
238 }
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200239 }
240
Paolo Bonzinib4933172014-07-09 11:53:10 +0200241 if (!node->deleted &&
242 (node->io_read || node->io_write)) {
243 node->pfd.revents = 0;
244 if ((revents & G_IO_IN) && node->io_read) {
245 node->io_read(node->opaque);
246 progress = true;
247 }
248 if ((revents & G_IO_OUT) && node->io_write) {
249 node->io_write(node->opaque);
250 progress = true;
251 }
252
253 /* if the next select() will return an event, we have progressed */
254 if (event == event_notifier_get_handle(&ctx->notifier)) {
255 WSANETWORKEVENTS ev;
256 WSAEnumNetworkEvents(node->pfd.fd, event, &ev);
257 if (ev.lNetworkEvents) {
258 progress = true;
259 }
260 }
261 }
262
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200263 tmp = node;
264 node = QLIST_NEXT(node, node);
265
266 ctx->walking_handlers--;
267
268 if (!ctx->walking_handlers && tmp->deleted) {
269 QLIST_REMOVE(tmp, node);
270 g_free(tmp);
271 }
272 }
273
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200274 return progress;
275}
276
Stefan Hajnoczi721671a2016-12-01 19:26:40 +0000277bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200278{
279 bool progress;
280
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200281 progress = aio_bh_poll(ctx);
Stefan Hajnoczi721671a2016-12-01 19:26:40 +0000282 if (dispatch_fds) {
283 progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
284 }
Paolo Bonzinid397ec992014-07-09 11:53:02 +0200285 progress |= timerlistgroup_run_timers(&ctx->tlg);
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200286 return progress;
287}
288
289bool aio_poll(AioContext *ctx, bool blocking)
290{
291 AioHandler *node;
292 HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200293 bool progress, have_select_revents, first;
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200294 int count;
295 int timeout;
296
Paolo Bonzini49110172015-02-20 17:26:51 +0100297 aio_context_acquire(ctx);
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200298 progress = false;
299
Paolo Bonzini0a9dd162014-07-09 11:53:07 +0200300 /* aio_notify can avoid the expensive event_notifier_set if
301 * everything (file descriptors, bottom halves, timers) will
302 * be re-evaluated before the next blocking poll(). This is
303 * already true when aio_poll is called with blocking == false;
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200304 * if blocking == true, it is only true after poll() returns,
305 * so disable the optimization now.
Paolo Bonzini0a9dd162014-07-09 11:53:07 +0200306 */
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200307 if (blocking) {
308 atomic_add(&ctx->notify_me, 2);
309 }
Paolo Bonzini0a9dd162014-07-09 11:53:07 +0200310
Paolo Bonzini6493c972015-07-21 16:07:50 +0200311 have_select_revents = aio_prepare(ctx);
312
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200313 ctx->walking_handlers++;
314
315 /* fill fd sets */
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200316 count = 0;
317 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
Fam Zhengc1e1e5f2015-10-23 11:08:08 +0800318 if (!node->deleted && node->io_notify
319 && aio_node_check(ctx, node->is_external)) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200320 events[count++] = event_notifier_get_handle(node->e);
321 }
322 }
323
324 ctx->walking_handlers--;
Paolo Bonzini3672fa52014-07-09 11:53:04 +0200325 first = true;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200326
Paolo Bonzini6493c972015-07-21 16:07:50 +0200327 /* ctx->notifier is always registered. */
328 assert(count > 0);
329
330 /* Multiple iterations, all of them non-blocking except the first,
331 * may be necessary to process all pending events. After the first
332 * WaitForMultipleObjects call ctx->notify_me will be decremented.
333 */
334 do {
Paolo Bonzinib4933172014-07-09 11:53:10 +0200335 HANDLE event;
Alex Bligh438e1f42013-08-21 16:02:53 +0100336 int ret;
337
Paolo Bonzini6493c972015-07-21 16:07:50 +0200338 timeout = blocking && !have_select_revents
Paolo Bonzini845ca102014-07-09 11:53:01 +0200339 ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
Paolo Bonzini49110172015-02-20 17:26:51 +0100340 if (timeout) {
341 aio_context_release(ctx);
342 }
Alex Bligh438e1f42013-08-21 16:02:53 +0100343 ret = WaitForMultipleObjects(count, events, FALSE, timeout);
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200344 if (blocking) {
345 assert(first);
346 atomic_sub(&ctx->notify_me, 2);
347 }
Paolo Bonzini49110172015-02-20 17:26:51 +0100348 if (timeout) {
349 aio_context_acquire(ctx);
350 }
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200351
Paolo Bonzini21a03d12015-07-21 16:07:52 +0200352 if (first) {
Paolo Bonzini05e514b2015-07-21 16:07:53 +0200353 aio_notify_accept(ctx);
Paolo Bonzini21a03d12015-07-21 16:07:52 +0200354 progress |= aio_bh_poll(ctx);
355 first = false;
Paolo Bonzini3672fa52014-07-09 11:53:04 +0200356 }
Paolo Bonzini3672fa52014-07-09 11:53:04 +0200357
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200358 /* if we have any signaled events, dispatch event */
Paolo Bonzinib4933172014-07-09 11:53:10 +0200359 event = NULL;
360 if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
361 event = events[ret - WAIT_OBJECT_0];
Paolo Bonzinia90d4112014-09-15 14:52:58 +0200362 events[ret - WAIT_OBJECT_0] = events[--count];
Paolo Bonzinib4933172014-07-09 11:53:10 +0200363 } else if (!have_select_revents) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200364 break;
365 }
366
Paolo Bonzinib4933172014-07-09 11:53:10 +0200367 have_select_revents = false;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200368 blocking = false;
369
Paolo Bonzinib4933172014-07-09 11:53:10 +0200370 progress |= aio_dispatch_handlers(ctx, event);
Paolo Bonzini6493c972015-07-21 16:07:50 +0200371 } while (count > 0);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200372
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200373 progress |= timerlistgroup_run_timers(&ctx->tlg);
Alex Bligh438e1f42013-08-21 16:02:53 +0100374
Paolo Bonzini49110172015-02-20 17:26:51 +0100375 aio_context_release(ctx);
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200376 return progress;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200377}
Fam Zheng37fcee52015-10-30 12:06:28 +0800378
Cao jin7e003462016-07-15 18:28:44 +0800379void aio_context_setup(AioContext *ctx)
Fam Zheng37fcee52015-10-30 12:06:28 +0800380{
381}
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +0000382
383void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, Error **errp)
384{
385 error_setg(errp, "AioContext polling is not implemented on Windows");
386}