blob: 1ad459d7ec0919ba718544db725cb9b02afe0cf3 [file] [log] [blame]
Paolo Bonzinif42b2202012-06-09 04:01:51 +02001/*
2 * QEMU aio implementation
3 *
4 * Copyright IBM Corp., 2008
5 * Copyright Red Hat Inc., 2012
6 *
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 * Paolo Bonzini <pbonzini@redhat.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2. See
12 * the COPYING file in the top-level directory.
13 *
14 * Contributions after 2012-01-13 are licensed under the terms of the
15 * GNU GPL, version 2 or (at your option) any later version.
16 */
17
Peter Maydelld38ea872016-01-29 17:50:05 +000018#include "qemu/osdep.h"
Paolo Bonzinif42b2202012-06-09 04:01:51 +020019#include "qemu-common.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010020#include "block/block.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010021#include "qemu/queue.h"
22#include "qemu/sockets.h"
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +000023#include "qapi/error.h"
Paolo Bonzinif42b2202012-06-09 04:01:51 +020024
25struct AioHandler {
26 EventNotifier *e;
Paolo Bonzinib4933172014-07-09 11:53:10 +020027 IOHandler *io_read;
28 IOHandler *io_write;
Paolo Bonzinif42b2202012-06-09 04:01:51 +020029 EventNotifierHandler *io_notify;
Paolo Bonzinif42b2202012-06-09 04:01:51 +020030 GPollFD pfd;
31 int deleted;
Paolo Bonzinib4933172014-07-09 11:53:10 +020032 void *opaque;
Fam Zhengdca21ef2015-10-23 11:08:05 +080033 bool is_external;
Paolo Bonzinif42b2202012-06-09 04:01:51 +020034 QLIST_ENTRY(AioHandler) node;
35};
36
Paolo Bonzinib4933172014-07-09 11:53:10 +020037void aio_set_fd_handler(AioContext *ctx,
38 int fd,
Fam Zhengdca21ef2015-10-23 11:08:05 +080039 bool is_external,
Paolo Bonzinib4933172014-07-09 11:53:10 +020040 IOHandler *io_read,
41 IOHandler *io_write,
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +000042 AioPollFn *io_poll,
Paolo Bonzinib4933172014-07-09 11:53:10 +020043 void *opaque)
44{
45 /* fd is a SOCKET in our case */
46 AioHandler *node;
47
48 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
49 if (node->pfd.fd == fd && !node->deleted) {
50 break;
51 }
52 }
53
54 /* Are we deleting the fd handler? */
55 if (!io_read && !io_write) {
56 if (node) {
57 /* If the lock is held, just mark the node as deleted */
58 if (ctx->walking_handlers) {
59 node->deleted = 1;
60 node->pfd.revents = 0;
61 } else {
62 /* Otherwise, delete it for real. We can't just mark it as
63 * deleted because deleted nodes are only cleaned up after
64 * releasing the walking_handlers lock.
65 */
66 QLIST_REMOVE(node, node);
67 g_free(node);
68 }
69 }
70 } else {
71 HANDLE event;
72
73 if (node == NULL) {
74 /* Alloc and insert if it's not already there */
Markus Armbruster3ba235a2014-12-04 13:55:09 +010075 node = g_new0(AioHandler, 1);
Paolo Bonzinib4933172014-07-09 11:53:10 +020076 node->pfd.fd = fd;
77 QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
78 }
79
80 node->pfd.events = 0;
81 if (node->io_read) {
82 node->pfd.events |= G_IO_IN;
83 }
84 if (node->io_write) {
85 node->pfd.events |= G_IO_OUT;
86 }
87
88 node->e = &ctx->notifier;
89
90 /* Update handler with latest information */
91 node->opaque = opaque;
92 node->io_read = io_read;
93 node->io_write = io_write;
Fam Zhengdca21ef2015-10-23 11:08:05 +080094 node->is_external = is_external;
Paolo Bonzinib4933172014-07-09 11:53:10 +020095
96 event = event_notifier_get_handle(&ctx->notifier);
97 WSAEventSelect(node->pfd.fd, event,
98 FD_READ | FD_ACCEPT | FD_CLOSE |
99 FD_CONNECT | FD_WRITE | FD_OOB);
100 }
101
102 aio_notify(ctx);
103}
104
Stefan Hajnoczi684e5082016-12-01 19:26:49 +0000105void aio_set_fd_poll(AioContext *ctx, int fd,
106 IOHandler *io_poll_begin,
107 IOHandler *io_poll_end)
108{
109 /* Not implemented */
110}
111
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200112void aio_set_event_notifier(AioContext *ctx,
113 EventNotifier *e,
Fam Zhengdca21ef2015-10-23 11:08:05 +0800114 bool is_external,
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +0000115 EventNotifierHandler *io_notify,
116 AioPollFn *io_poll)
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200117{
118 AioHandler *node;
119
120 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
121 if (node->e == e && !node->deleted) {
122 break;
123 }
124 }
125
126 /* Are we deleting the fd handler? */
127 if (!io_notify) {
128 if (node) {
Paolo Bonzinie3713e02012-09-24 14:57:41 +0200129 g_source_remove_poll(&ctx->source, &node->pfd);
130
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200131 /* If the lock is held, just mark the node as deleted */
132 if (ctx->walking_handlers) {
133 node->deleted = 1;
134 node->pfd.revents = 0;
135 } else {
136 /* Otherwise, delete it for real. We can't just mark it as
137 * deleted because deleted nodes are only cleaned up after
138 * releasing the walking_handlers lock.
139 */
140 QLIST_REMOVE(node, node);
141 g_free(node);
142 }
143 }
144 } else {
145 if (node == NULL) {
146 /* Alloc and insert if it's not already there */
Markus Armbruster3ba235a2014-12-04 13:55:09 +0100147 node = g_new0(AioHandler, 1);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200148 node->e = e;
149 node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
150 node->pfd.events = G_IO_IN;
Fam Zhengdca21ef2015-10-23 11:08:05 +0800151 node->is_external = is_external;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200152 QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
Paolo Bonzinie3713e02012-09-24 14:57:41 +0200153
154 g_source_add_poll(&ctx->source, &node->pfd);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200155 }
156 /* Update handler with latest information */
157 node->io_notify = io_notify;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200158 }
Paolo Bonzini7ed2b242012-09-25 10:22:39 +0200159
160 aio_notify(ctx);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200161}
162
Stefan Hajnoczi684e5082016-12-01 19:26:49 +0000163void aio_set_event_notifier_poll(AioContext *ctx,
164 EventNotifier *notifier,
165 EventNotifierHandler *io_poll_begin,
166 EventNotifierHandler *io_poll_end)
167{
168 /* Not implemented */
169}
170
Paolo Bonzinia3462c62014-07-09 11:53:08 +0200171bool aio_prepare(AioContext *ctx)
172{
Paolo Bonzinib4933172014-07-09 11:53:10 +0200173 static struct timeval tv0;
174 AioHandler *node;
175 bool have_select_revents = false;
176 fd_set rfds, wfds;
177
178 /* fill fd sets */
179 FD_ZERO(&rfds);
180 FD_ZERO(&wfds);
181 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
182 if (node->io_read) {
183 FD_SET ((SOCKET)node->pfd.fd, &rfds);
184 }
185 if (node->io_write) {
186 FD_SET ((SOCKET)node->pfd.fd, &wfds);
187 }
188 }
189
190 if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
191 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
192 node->pfd.revents = 0;
193 if (FD_ISSET(node->pfd.fd, &rfds)) {
194 node->pfd.revents |= G_IO_IN;
195 have_select_revents = true;
196 }
197
198 if (FD_ISSET(node->pfd.fd, &wfds)) {
199 node->pfd.revents |= G_IO_OUT;
200 have_select_revents = true;
201 }
202 }
203 }
204
205 return have_select_revents;
Paolo Bonzinia3462c62014-07-09 11:53:08 +0200206}
207
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200208bool aio_pending(AioContext *ctx)
209{
210 AioHandler *node;
211
212 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
213 if (node->pfd.revents && node->io_notify) {
214 return true;
215 }
Paolo Bonzinib4933172014-07-09 11:53:10 +0200216
217 if ((node->pfd.revents & G_IO_IN) && node->io_read) {
218 return true;
219 }
220 if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
221 return true;
222 }
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200223 }
224
225 return false;
226}
227
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200228static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200229{
Paolo Bonziniabf90d32017-01-12 19:07:56 +0100230 AioHandler *node, *tmp;
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200231 bool progress = false;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200232
Paolo Bonziniabf90d32017-01-12 19:07:56 +0100233 ctx->walking_handlers++;
234
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200235 /*
Paolo Bonzini87f68d32014-07-07 15:18:02 +0200236 * We have to walk very carefully in case aio_set_fd_handler is
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200237 * called while we're walking.
238 */
Paolo Bonziniabf90d32017-01-12 19:07:56 +0100239 QLIST_FOREACH_SAFE(node, &ctx->aio_handlers, node, tmp) {
Paolo Bonzinib4933172014-07-09 11:53:10 +0200240 int revents = node->pfd.revents;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200241
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200242 if (!node->deleted &&
Paolo Bonzinib4933172014-07-09 11:53:10 +0200243 (revents || event_notifier_get_handle(node->e) == event) &&
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200244 node->io_notify) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200245 node->pfd.revents = 0;
246 node->io_notify(node->e);
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200247
248 /* aio_notify() does not count as progress */
Stefan Hajnoczi8b2d42d2013-08-22 15:28:35 +0200249 if (node->e != &ctx->notifier) {
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200250 progress = true;
251 }
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200252 }
253
Paolo Bonzinib4933172014-07-09 11:53:10 +0200254 if (!node->deleted &&
255 (node->io_read || node->io_write)) {
256 node->pfd.revents = 0;
257 if ((revents & G_IO_IN) && node->io_read) {
258 node->io_read(node->opaque);
259 progress = true;
260 }
261 if ((revents & G_IO_OUT) && node->io_write) {
262 node->io_write(node->opaque);
263 progress = true;
264 }
265
266 /* if the next select() will return an event, we have progressed */
267 if (event == event_notifier_get_handle(&ctx->notifier)) {
268 WSANETWORKEVENTS ev;
269 WSAEnumNetworkEvents(node->pfd.fd, event, &ev);
270 if (ev.lNetworkEvents) {
271 progress = true;
272 }
273 }
274 }
275
Paolo Bonziniabf90d32017-01-12 19:07:56 +0100276 if (node->deleted) {
277 ctx->walking_handlers--;
278 if (!ctx->walking_handlers) {
279 QLIST_REMOVE(node, node);
280 g_free(node);
281 }
282 ctx->walking_handlers++;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200283 }
284 }
285
Paolo Bonziniabf90d32017-01-12 19:07:56 +0100286 ctx->walking_handlers--;
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200287 return progress;
288}
289
Stefan Hajnoczi721671a2016-12-01 19:26:40 +0000290bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200291{
292 bool progress;
293
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200294 progress = aio_bh_poll(ctx);
Stefan Hajnoczi721671a2016-12-01 19:26:40 +0000295 if (dispatch_fds) {
296 progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
297 }
Paolo Bonzinid397ec992014-07-09 11:53:02 +0200298 progress |= timerlistgroup_run_timers(&ctx->tlg);
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200299 return progress;
300}
301
302bool aio_poll(AioContext *ctx, bool blocking)
303{
304 AioHandler *node;
305 HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200306 bool progress, have_select_revents, first;
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200307 int count;
308 int timeout;
309
Paolo Bonzini49110172015-02-20 17:26:51 +0100310 aio_context_acquire(ctx);
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200311 progress = false;
312
Paolo Bonzini0a9dd162014-07-09 11:53:07 +0200313 /* aio_notify can avoid the expensive event_notifier_set if
314 * everything (file descriptors, bottom halves, timers) will
315 * be re-evaluated before the next blocking poll(). This is
316 * already true when aio_poll is called with blocking == false;
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200317 * if blocking == true, it is only true after poll() returns,
318 * so disable the optimization now.
Paolo Bonzini0a9dd162014-07-09 11:53:07 +0200319 */
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200320 if (blocking) {
321 atomic_add(&ctx->notify_me, 2);
322 }
Paolo Bonzini0a9dd162014-07-09 11:53:07 +0200323
Paolo Bonzini6493c972015-07-21 16:07:50 +0200324 have_select_revents = aio_prepare(ctx);
325
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200326 ctx->walking_handlers++;
327
328 /* fill fd sets */
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200329 count = 0;
330 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
Fam Zhengc1e1e5f2015-10-23 11:08:08 +0800331 if (!node->deleted && node->io_notify
332 && aio_node_check(ctx, node->is_external)) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200333 events[count++] = event_notifier_get_handle(node->e);
334 }
335 }
336
337 ctx->walking_handlers--;
Paolo Bonzini3672fa52014-07-09 11:53:04 +0200338 first = true;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200339
Paolo Bonzini6493c972015-07-21 16:07:50 +0200340 /* ctx->notifier is always registered. */
341 assert(count > 0);
342
343 /* Multiple iterations, all of them non-blocking except the first,
344 * may be necessary to process all pending events. After the first
345 * WaitForMultipleObjects call ctx->notify_me will be decremented.
346 */
347 do {
Paolo Bonzinib4933172014-07-09 11:53:10 +0200348 HANDLE event;
Alex Bligh438e1f42013-08-21 16:02:53 +0100349 int ret;
350
Paolo Bonzini6493c972015-07-21 16:07:50 +0200351 timeout = blocking && !have_select_revents
Paolo Bonzini845ca102014-07-09 11:53:01 +0200352 ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
Paolo Bonzini49110172015-02-20 17:26:51 +0100353 if (timeout) {
354 aio_context_release(ctx);
355 }
Alex Bligh438e1f42013-08-21 16:02:53 +0100356 ret = WaitForMultipleObjects(count, events, FALSE, timeout);
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200357 if (blocking) {
358 assert(first);
359 atomic_sub(&ctx->notify_me, 2);
360 }
Paolo Bonzini49110172015-02-20 17:26:51 +0100361 if (timeout) {
362 aio_context_acquire(ctx);
363 }
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200364
Paolo Bonzini21a03d12015-07-21 16:07:52 +0200365 if (first) {
Paolo Bonzini05e514b2015-07-21 16:07:53 +0200366 aio_notify_accept(ctx);
Paolo Bonzini21a03d12015-07-21 16:07:52 +0200367 progress |= aio_bh_poll(ctx);
368 first = false;
Paolo Bonzini3672fa52014-07-09 11:53:04 +0200369 }
Paolo Bonzini3672fa52014-07-09 11:53:04 +0200370
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200371 /* if we have any signaled events, dispatch event */
Paolo Bonzinib4933172014-07-09 11:53:10 +0200372 event = NULL;
373 if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
374 event = events[ret - WAIT_OBJECT_0];
Paolo Bonzinia90d4112014-09-15 14:52:58 +0200375 events[ret - WAIT_OBJECT_0] = events[--count];
Paolo Bonzinib4933172014-07-09 11:53:10 +0200376 } else if (!have_select_revents) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200377 break;
378 }
379
Paolo Bonzinib4933172014-07-09 11:53:10 +0200380 have_select_revents = false;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200381 blocking = false;
382
Paolo Bonzinib4933172014-07-09 11:53:10 +0200383 progress |= aio_dispatch_handlers(ctx, event);
Paolo Bonzini6493c972015-07-21 16:07:50 +0200384 } while (count > 0);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200385
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200386 progress |= timerlistgroup_run_timers(&ctx->tlg);
Alex Bligh438e1f42013-08-21 16:02:53 +0100387
Paolo Bonzini49110172015-02-20 17:26:51 +0100388 aio_context_release(ctx);
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200389 return progress;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200390}
Fam Zheng37fcee52015-10-30 12:06:28 +0800391
Cao jin7e003462016-07-15 18:28:44 +0800392void aio_context_setup(AioContext *ctx)
Fam Zheng37fcee52015-10-30 12:06:28 +0800393{
394}
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +0000395
Stefan Hajnoczi82a41182016-12-01 19:26:51 +0000396void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
397 int64_t grow, int64_t shrink, Error **errp)
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +0000398{
399 error_setg(errp, "AioContext polling is not implemented on Windows");
400}