blob: 900524c9c24ed9009711bbee742746686b233aa4 [file] [log] [blame]
Paolo Bonzinif42b2202012-06-09 04:01:51 +02001/*
2 * QEMU aio implementation
3 *
4 * Copyright IBM Corp., 2008
5 * Copyright Red Hat Inc., 2012
6 *
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 * Paolo Bonzini <pbonzini@redhat.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2. See
12 * the COPYING file in the top-level directory.
13 *
14 * Contributions after 2012-01-13 are licensed under the terms of the
15 * GNU GPL, version 2 or (at your option) any later version.
16 */
17
Peter Maydelld38ea872016-01-29 17:50:05 +000018#include "qemu/osdep.h"
Paolo Bonzinif42b2202012-06-09 04:01:51 +020019#include "qemu-common.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010020#include "block/block.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010021#include "qemu/queue.h"
22#include "qemu/sockets.h"
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +000023#include "qapi/error.h"
Paolo Bonzinib92d9a92017-01-12 19:07:58 +010024#include "qemu/rcu_queue.h"
Paolo Bonzinif42b2202012-06-09 04:01:51 +020025
26struct AioHandler {
27 EventNotifier *e;
Paolo Bonzinib4933172014-07-09 11:53:10 +020028 IOHandler *io_read;
29 IOHandler *io_write;
Paolo Bonzinif42b2202012-06-09 04:01:51 +020030 EventNotifierHandler *io_notify;
Paolo Bonzinif42b2202012-06-09 04:01:51 +020031 GPollFD pfd;
32 int deleted;
Paolo Bonzinib4933172014-07-09 11:53:10 +020033 void *opaque;
Fam Zhengdca21ef2015-10-23 11:08:05 +080034 bool is_external;
Paolo Bonzinif42b2202012-06-09 04:01:51 +020035 QLIST_ENTRY(AioHandler) node;
36};
37
Paolo Bonzinib4933172014-07-09 11:53:10 +020038void aio_set_fd_handler(AioContext *ctx,
39 int fd,
Fam Zhengdca21ef2015-10-23 11:08:05 +080040 bool is_external,
Paolo Bonzinib4933172014-07-09 11:53:10 +020041 IOHandler *io_read,
42 IOHandler *io_write,
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +000043 AioPollFn *io_poll,
Paolo Bonzinib4933172014-07-09 11:53:10 +020044 void *opaque)
45{
46 /* fd is a SOCKET in our case */
47 AioHandler *node;
48
Paolo Bonzinib92d9a92017-01-12 19:07:58 +010049 qemu_lockcnt_lock(&ctx->list_lock);
Paolo Bonzinib4933172014-07-09 11:53:10 +020050 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
51 if (node->pfd.fd == fd && !node->deleted) {
52 break;
53 }
54 }
55
56 /* Are we deleting the fd handler? */
57 if (!io_read && !io_write) {
58 if (node) {
Paolo Bonzinib92d9a92017-01-12 19:07:58 +010059 /* If aio_poll is in progress, just mark the node as deleted */
60 if (qemu_lockcnt_count(&ctx->list_lock)) {
Paolo Bonzinib4933172014-07-09 11:53:10 +020061 node->deleted = 1;
62 node->pfd.revents = 0;
63 } else {
64 /* Otherwise, delete it for real. We can't just mark it as
65 * deleted because deleted nodes are only cleaned up after
Paolo Bonzinib92d9a92017-01-12 19:07:58 +010066 * releasing the list_lock.
Paolo Bonzinib4933172014-07-09 11:53:10 +020067 */
68 QLIST_REMOVE(node, node);
69 g_free(node);
70 }
71 }
72 } else {
73 HANDLE event;
74
75 if (node == NULL) {
76 /* Alloc and insert if it's not already there */
Markus Armbruster3ba235a2014-12-04 13:55:09 +010077 node = g_new0(AioHandler, 1);
Paolo Bonzinib4933172014-07-09 11:53:10 +020078 node->pfd.fd = fd;
Paolo Bonzinib92d9a92017-01-12 19:07:58 +010079 QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
Paolo Bonzinib4933172014-07-09 11:53:10 +020080 }
81
82 node->pfd.events = 0;
83 if (node->io_read) {
84 node->pfd.events |= G_IO_IN;
85 }
86 if (node->io_write) {
87 node->pfd.events |= G_IO_OUT;
88 }
89
90 node->e = &ctx->notifier;
91
92 /* Update handler with latest information */
93 node->opaque = opaque;
94 node->io_read = io_read;
95 node->io_write = io_write;
Fam Zhengdca21ef2015-10-23 11:08:05 +080096 node->is_external = is_external;
Paolo Bonzinib4933172014-07-09 11:53:10 +020097
98 event = event_notifier_get_handle(&ctx->notifier);
99 WSAEventSelect(node->pfd.fd, event,
100 FD_READ | FD_ACCEPT | FD_CLOSE |
101 FD_CONNECT | FD_WRITE | FD_OOB);
102 }
103
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100104 qemu_lockcnt_unlock(&ctx->list_lock);
Paolo Bonzinib4933172014-07-09 11:53:10 +0200105 aio_notify(ctx);
106}
107
Stefan Hajnoczi684e5082016-12-01 19:26:49 +0000108void aio_set_fd_poll(AioContext *ctx, int fd,
109 IOHandler *io_poll_begin,
110 IOHandler *io_poll_end)
111{
112 /* Not implemented */
113}
114
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200115void aio_set_event_notifier(AioContext *ctx,
116 EventNotifier *e,
Fam Zhengdca21ef2015-10-23 11:08:05 +0800117 bool is_external,
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +0000118 EventNotifierHandler *io_notify,
119 AioPollFn *io_poll)
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200120{
121 AioHandler *node;
122
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100123 qemu_lockcnt_lock(&ctx->list_lock);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200124 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
125 if (node->e == e && !node->deleted) {
126 break;
127 }
128 }
129
130 /* Are we deleting the fd handler? */
131 if (!io_notify) {
132 if (node) {
Paolo Bonzinie3713e02012-09-24 14:57:41 +0200133 g_source_remove_poll(&ctx->source, &node->pfd);
134
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100135 /* aio_poll is in progress, just mark the node as deleted */
136 if (qemu_lockcnt_count(&ctx->list_lock)) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200137 node->deleted = 1;
138 node->pfd.revents = 0;
139 } else {
140 /* Otherwise, delete it for real. We can't just mark it as
141 * deleted because deleted nodes are only cleaned up after
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100142 * releasing the list_lock.
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200143 */
144 QLIST_REMOVE(node, node);
145 g_free(node);
146 }
147 }
148 } else {
149 if (node == NULL) {
150 /* Alloc and insert if it's not already there */
Markus Armbruster3ba235a2014-12-04 13:55:09 +0100151 node = g_new0(AioHandler, 1);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200152 node->e = e;
153 node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
154 node->pfd.events = G_IO_IN;
Fam Zhengdca21ef2015-10-23 11:08:05 +0800155 node->is_external = is_external;
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100156 QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
Paolo Bonzinie3713e02012-09-24 14:57:41 +0200157
158 g_source_add_poll(&ctx->source, &node->pfd);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200159 }
160 /* Update handler with latest information */
161 node->io_notify = io_notify;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200162 }
Paolo Bonzini7ed2b242012-09-25 10:22:39 +0200163
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100164 qemu_lockcnt_unlock(&ctx->list_lock);
Paolo Bonzini7ed2b242012-09-25 10:22:39 +0200165 aio_notify(ctx);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200166}
167
Stefan Hajnoczi684e5082016-12-01 19:26:49 +0000168void aio_set_event_notifier_poll(AioContext *ctx,
169 EventNotifier *notifier,
170 EventNotifierHandler *io_poll_begin,
171 EventNotifierHandler *io_poll_end)
172{
173 /* Not implemented */
174}
175
Paolo Bonzinia3462c62014-07-09 11:53:08 +0200176bool aio_prepare(AioContext *ctx)
177{
Paolo Bonzinib4933172014-07-09 11:53:10 +0200178 static struct timeval tv0;
179 AioHandler *node;
180 bool have_select_revents = false;
181 fd_set rfds, wfds;
182
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100183 /*
184 * We have to walk very carefully in case aio_set_fd_handler is
185 * called while we're walking.
186 */
187 qemu_lockcnt_inc(&ctx->list_lock);
188
Paolo Bonzinib4933172014-07-09 11:53:10 +0200189 /* fill fd sets */
190 FD_ZERO(&rfds);
191 FD_ZERO(&wfds);
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100192 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
Paolo Bonzinib4933172014-07-09 11:53:10 +0200193 if (node->io_read) {
194 FD_SET ((SOCKET)node->pfd.fd, &rfds);
195 }
196 if (node->io_write) {
197 FD_SET ((SOCKET)node->pfd.fd, &wfds);
198 }
199 }
200
201 if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100202 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
Paolo Bonzinib4933172014-07-09 11:53:10 +0200203 node->pfd.revents = 0;
204 if (FD_ISSET(node->pfd.fd, &rfds)) {
205 node->pfd.revents |= G_IO_IN;
206 have_select_revents = true;
207 }
208
209 if (FD_ISSET(node->pfd.fd, &wfds)) {
210 node->pfd.revents |= G_IO_OUT;
211 have_select_revents = true;
212 }
213 }
214 }
215
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100216 qemu_lockcnt_dec(&ctx->list_lock);
Paolo Bonzinib4933172014-07-09 11:53:10 +0200217 return have_select_revents;
Paolo Bonzinia3462c62014-07-09 11:53:08 +0200218}
219
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200220bool aio_pending(AioContext *ctx)
221{
222 AioHandler *node;
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100223 bool result = false;
Paolo Bonziniabf90d32017-01-12 19:07:56 +0100224
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200225 /*
Paolo Bonzini87f68d32014-07-07 15:18:02 +0200226 * We have to walk very carefully in case aio_set_fd_handler is
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200227 * called while we're walking.
228 */
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100229 qemu_lockcnt_inc(&ctx->list_lock);
230 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
231 if (node->pfd.revents && node->io_notify) {
232 result = true;
233 break;
234 }
235
236 if ((node->pfd.revents & G_IO_IN) && node->io_read) {
237 result = true;
238 break;
239 }
240 if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
241 result = true;
242 break;
243 }
244 }
245
246 qemu_lockcnt_dec(&ctx->list_lock);
247 return result;
248}
249
250static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
251{
252 AioHandler *node;
253 bool progress = false;
254 AioHandler *tmp;
255
256 qemu_lockcnt_inc(&ctx->list_lock);
257
258 /*
259 * We have to walk very carefully in case aio_set_fd_handler is
260 * called while we're walking.
261 */
262 QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
Paolo Bonzinib4933172014-07-09 11:53:10 +0200263 int revents = node->pfd.revents;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200264
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200265 if (!node->deleted &&
Paolo Bonzinib4933172014-07-09 11:53:10 +0200266 (revents || event_notifier_get_handle(node->e) == event) &&
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200267 node->io_notify) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200268 node->pfd.revents = 0;
269 node->io_notify(node->e);
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200270
271 /* aio_notify() does not count as progress */
Stefan Hajnoczi8b2d42d2013-08-22 15:28:35 +0200272 if (node->e != &ctx->notifier) {
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200273 progress = true;
274 }
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200275 }
276
Paolo Bonzinib4933172014-07-09 11:53:10 +0200277 if (!node->deleted &&
278 (node->io_read || node->io_write)) {
279 node->pfd.revents = 0;
280 if ((revents & G_IO_IN) && node->io_read) {
281 node->io_read(node->opaque);
282 progress = true;
283 }
284 if ((revents & G_IO_OUT) && node->io_write) {
285 node->io_write(node->opaque);
286 progress = true;
287 }
288
289 /* if the next select() will return an event, we have progressed */
290 if (event == event_notifier_get_handle(&ctx->notifier)) {
291 WSANETWORKEVENTS ev;
292 WSAEnumNetworkEvents(node->pfd.fd, event, &ev);
293 if (ev.lNetworkEvents) {
294 progress = true;
295 }
296 }
297 }
298
Paolo Bonziniabf90d32017-01-12 19:07:56 +0100299 if (node->deleted) {
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100300 if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
Paolo Bonziniabf90d32017-01-12 19:07:56 +0100301 QLIST_REMOVE(node, node);
302 g_free(node);
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100303 qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
Paolo Bonziniabf90d32017-01-12 19:07:56 +0100304 }
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200305 }
306 }
307
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100308 qemu_lockcnt_dec(&ctx->list_lock);
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200309 return progress;
310}
311
Stefan Hajnoczi721671a2016-12-01 19:26:40 +0000312bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200313{
314 bool progress;
315
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200316 progress = aio_bh_poll(ctx);
Stefan Hajnoczi721671a2016-12-01 19:26:40 +0000317 if (dispatch_fds) {
318 progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
319 }
Paolo Bonzinid397ec992014-07-09 11:53:02 +0200320 progress |= timerlistgroup_run_timers(&ctx->tlg);
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200321 return progress;
322}
323
324bool aio_poll(AioContext *ctx, bool blocking)
325{
326 AioHandler *node;
327 HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200328 bool progress, have_select_revents, first;
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200329 int count;
330 int timeout;
331
Paolo Bonzini49110172015-02-20 17:26:51 +0100332 aio_context_acquire(ctx);
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200333 progress = false;
334
Paolo Bonzini0a9dd162014-07-09 11:53:07 +0200335 /* aio_notify can avoid the expensive event_notifier_set if
336 * everything (file descriptors, bottom halves, timers) will
337 * be re-evaluated before the next blocking poll(). This is
338 * already true when aio_poll is called with blocking == false;
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200339 * if blocking == true, it is only true after poll() returns,
340 * so disable the optimization now.
Paolo Bonzini0a9dd162014-07-09 11:53:07 +0200341 */
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200342 if (blocking) {
343 atomic_add(&ctx->notify_me, 2);
344 }
Paolo Bonzini0a9dd162014-07-09 11:53:07 +0200345
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100346 qemu_lockcnt_inc(&ctx->list_lock);
Paolo Bonzini6493c972015-07-21 16:07:50 +0200347 have_select_revents = aio_prepare(ctx);
348
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200349 /* fill fd sets */
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200350 count = 0;
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100351 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
Fam Zhengc1e1e5f2015-10-23 11:08:08 +0800352 if (!node->deleted && node->io_notify
353 && aio_node_check(ctx, node->is_external)) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200354 events[count++] = event_notifier_get_handle(node->e);
355 }
356 }
357
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100358 qemu_lockcnt_dec(&ctx->list_lock);
Paolo Bonzini3672fa52014-07-09 11:53:04 +0200359 first = true;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200360
Paolo Bonzini6493c972015-07-21 16:07:50 +0200361 /* ctx->notifier is always registered. */
362 assert(count > 0);
363
364 /* Multiple iterations, all of them non-blocking except the first,
365 * may be necessary to process all pending events. After the first
366 * WaitForMultipleObjects call ctx->notify_me will be decremented.
367 */
368 do {
Paolo Bonzinib4933172014-07-09 11:53:10 +0200369 HANDLE event;
Alex Bligh438e1f42013-08-21 16:02:53 +0100370 int ret;
371
Paolo Bonzini6493c972015-07-21 16:07:50 +0200372 timeout = blocking && !have_select_revents
Paolo Bonzini845ca102014-07-09 11:53:01 +0200373 ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
Paolo Bonzini49110172015-02-20 17:26:51 +0100374 if (timeout) {
375 aio_context_release(ctx);
376 }
Alex Bligh438e1f42013-08-21 16:02:53 +0100377 ret = WaitForMultipleObjects(count, events, FALSE, timeout);
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200378 if (blocking) {
379 assert(first);
380 atomic_sub(&ctx->notify_me, 2);
381 }
Paolo Bonzini49110172015-02-20 17:26:51 +0100382 if (timeout) {
383 aio_context_acquire(ctx);
384 }
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200385
Paolo Bonzini21a03d12015-07-21 16:07:52 +0200386 if (first) {
Paolo Bonzini05e514b2015-07-21 16:07:53 +0200387 aio_notify_accept(ctx);
Paolo Bonzini21a03d12015-07-21 16:07:52 +0200388 progress |= aio_bh_poll(ctx);
389 first = false;
Paolo Bonzini3672fa52014-07-09 11:53:04 +0200390 }
Paolo Bonzini3672fa52014-07-09 11:53:04 +0200391
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200392 /* if we have any signaled events, dispatch event */
Paolo Bonzinib4933172014-07-09 11:53:10 +0200393 event = NULL;
394 if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
395 event = events[ret - WAIT_OBJECT_0];
Paolo Bonzinia90d4112014-09-15 14:52:58 +0200396 events[ret - WAIT_OBJECT_0] = events[--count];
Paolo Bonzinib4933172014-07-09 11:53:10 +0200397 } else if (!have_select_revents) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200398 break;
399 }
400
Paolo Bonzinib4933172014-07-09 11:53:10 +0200401 have_select_revents = false;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200402 blocking = false;
403
Paolo Bonzinib4933172014-07-09 11:53:10 +0200404 progress |= aio_dispatch_handlers(ctx, event);
Paolo Bonzini6493c972015-07-21 16:07:50 +0200405 } while (count > 0);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200406
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200407 progress |= timerlistgroup_run_timers(&ctx->tlg);
Alex Bligh438e1f42013-08-21 16:02:53 +0100408
Paolo Bonzini49110172015-02-20 17:26:51 +0100409 aio_context_release(ctx);
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200410 return progress;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200411}
Fam Zheng37fcee52015-10-30 12:06:28 +0800412
Cao jin7e003462016-07-15 18:28:44 +0800413void aio_context_setup(AioContext *ctx)
Fam Zheng37fcee52015-10-30 12:06:28 +0800414{
415}
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +0000416
Stefan Hajnoczi82a41182016-12-01 19:26:51 +0000417void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
418 int64_t grow, int64_t shrink, Error **errp)
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +0000419{
420 error_setg(errp, "AioContext polling is not implemented on Windows");
421}