memory: emulate ioeventfd

The ioeventfd mechanism is used by vhost, dataplane, and virtio-pci to
turn guest MMIO/PIO writes into eventfd file descriptor events.  This
allows arbitrary threads to be notified when the guest writes to a
specific MMIO/PIO address.

qtest and TCG do not support ioeventfd because memory writes are not
checked against registered ioeventfds in QEMU.  This patch implements
this in memory_region_dispatch_write() so qtest can use ioeventfd.

Also this patch fixes vhost aborting on some misconfigured old kernels
like 3.18.0 on ARM. It is possible to explicitly enable CONFIG_EVENTFD
in expert settings, while MMIO binding support in KVM will still be
missing.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
Message-Id: <006e01d12377$0b9c2d40$22d487c0$@samsung.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
diff --git a/memory.c b/memory.c
index af08cf1..1c1c192 100644
--- a/memory.c
+++ b/memory.c
@@ -18,12 +18,14 @@
 #include "exec/ioport.h"
 #include "qapi/visitor.h"
 #include "qemu/bitops.h"
+#include "qemu/error-report.h"
 #include "qom/object.h"
 #include "trace.h"
 #include <assert.h>
 
 #include "exec/memory-internal.h"
 #include "exec/ram_addr.h"
+#include "sysemu/kvm.h"
 #include "sysemu/sysemu.h"
 
 //#define DEBUG_UNASSIGNED
@@ -1136,6 +1138,32 @@
     return r;
 }
 
+/* Return true if an eventfd was signalled */
+static bool memory_region_dispatch_write_eventfds(MemoryRegion *mr,
+                                                    hwaddr addr,
+                                                    uint64_t data,
+                                                    unsigned size,
+                                                    MemTxAttrs attrs)
+{
+    MemoryRegionIoeventfd ioeventfd = {
+        .addr = addrrange_make(int128_make64(addr), int128_make64(size)),
+        .data = data,
+    };
+    unsigned i;
+
+    for (i = 0; i < mr->ioeventfd_nb; i++) {
+        ioeventfd.match_data = mr->ioeventfds[i].match_data;
+        ioeventfd.e = mr->ioeventfds[i].e;
+
+        if (memory_region_ioeventfd_equal(ioeventfd, mr->ioeventfds[i])) {
+            event_notifier_set(ioeventfd.e);
+            return true;
+        }
+    }
+
+    return false;
+}
+
 MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
                                          hwaddr addr,
                                          uint64_t data,
@@ -1149,6 +1177,11 @@
 
     adjust_endianness(mr, &data, size);
 
+    if ((!kvm_eventfds_enabled()) &&
+        memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) {
+        return MEMTX_OK;
+    }
+
     if (mr->ops->write) {
         return access_with_adjusted_size(addr, &data, size,
                                          mr->ops->impl.min_access_size,
@@ -1667,6 +1700,8 @@
     mr->global_locking = false;
 }
 
+static bool userspace_eventfd_warning;
+
 void memory_region_add_eventfd(MemoryRegion *mr,
                                hwaddr addr,
                                unsigned size,
@@ -1683,6 +1718,13 @@
     };
     unsigned i;
 
+    if (kvm_enabled() && (!(kvm_eventfds_enabled() ||
+                            userspace_eventfd_warning))) {
+        userspace_eventfd_warning = true;
+        error_report("Using eventfd without MMIO binding in KVM. "
+                     "Suboptimal performance expected");
+    }
+
     if (size) {
         adjust_endianness(mr, &mrfd.data, size);
     }