aboutsummaryrefslogtreecommitdiff
path: root/hw/virtio/vhost-user.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/virtio/vhost-user.c')
-rw-r--r--hw/virtio/vhost-user.c1601
1 files changed, 1064 insertions, 537 deletions
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 2c8556237f..cdf9af4a4b 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -10,7 +10,9 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
+#include "hw/virtio/virtio-dmabuf.h"
#include "hw/virtio/vhost.h"
+#include "hw/virtio/virtio-crypto.h"
#include "hw/virtio/vhost-user.h"
#include "hw/virtio/vhost-backend.h"
#include "hw/virtio/virtio.h"
@@ -20,11 +22,13 @@
#include "sysemu/kvm.h"
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
+#include "qemu/uuid.h"
#include "qemu/sockets.h"
+#include "sysemu/runstate.h"
#include "sysemu/cryptodev.h"
-#include "migration/migration.h"
#include "migration/postcopy-ram.h"
#include "trace.h"
+#include "exec/ramblock.h"
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -38,19 +42,9 @@
#define VHOST_MEMORY_BASELINE_NREGIONS 8
#define VHOST_USER_F_PROTOCOL_FEATURES 30
-#define VHOST_USER_SLAVE_MAX_FDS 8
+#define VHOST_USER_BACKEND_MAX_FDS 8
-/*
- * Set maximum number of RAM slots supported to
- * the maximum number supported by the target
- * hardware plaform.
- */
-#if defined(TARGET_X86) || defined(TARGET_X86_64) || \
- defined(TARGET_ARM) || defined(TARGET_ARM_64)
-#include "hw/acpi/acpi.h"
-#define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS
-
-#elif defined(TARGET_PPC) || defined(TARGET_PPC_64)
+#if defined(TARGET_PPC) || defined(TARGET_PPC64)
#include "hw/ppc/spapr.h"
#define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS
@@ -63,26 +57,6 @@
*/
#define VHOST_USER_MAX_CONFIG_SIZE 256
-enum VhostUserProtocolFeature {
- VHOST_USER_PROTOCOL_F_MQ = 0,
- VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
- VHOST_USER_PROTOCOL_F_RARP = 2,
- VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
- VHOST_USER_PROTOCOL_F_NET_MTU = 4,
- VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
- VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
- VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
- VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
- VHOST_USER_PROTOCOL_F_CONFIG = 9,
- VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
- VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
- VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
- VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
- /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
- VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
- VHOST_USER_PROTOCOL_F_MAX
-};
-
#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
typedef enum VhostUserRequest {
@@ -107,7 +81,7 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_VRING_ENABLE = 18,
VHOST_USER_SEND_RARP = 19,
VHOST_USER_NET_SET_MTU = 20,
- VHOST_USER_SET_SLAVE_REQ_FD = 21,
+ VHOST_USER_SET_BACKEND_REQ_FD = 21,
VHOST_USER_IOTLB_MSG = 22,
VHOST_USER_SET_VRING_ENDIAN = 23,
VHOST_USER_GET_CONFIG = 24,
@@ -125,16 +99,24 @@ typedef enum VhostUserRequest {
VHOST_USER_GET_MAX_MEM_SLOTS = 36,
VHOST_USER_ADD_MEM_REG = 37,
VHOST_USER_REM_MEM_REG = 38,
+ VHOST_USER_SET_STATUS = 39,
+ VHOST_USER_GET_STATUS = 40,
+ VHOST_USER_GET_SHARED_OBJECT = 41,
+ VHOST_USER_SET_DEVICE_STATE_FD = 42,
+ VHOST_USER_CHECK_DEVICE_STATE = 43,
VHOST_USER_MAX
} VhostUserRequest;
-typedef enum VhostUserSlaveRequest {
- VHOST_USER_SLAVE_NONE = 0,
- VHOST_USER_SLAVE_IOTLB_MSG = 1,
- VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
- VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
- VHOST_USER_SLAVE_MAX
-} VhostUserSlaveRequest;
+typedef enum VhostUserBackendRequest {
+ VHOST_USER_BACKEND_NONE = 0,
+ VHOST_USER_BACKEND_IOTLB_MSG = 1,
+ VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2,
+ VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3,
+ VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6,
+ VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7,
+ VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
+ VHOST_USER_BACKEND_MAX
+} VhostUserBackendRequest;
typedef struct VhostUserMemoryRegion {
uint64_t guest_phys_addr;
@@ -168,13 +150,24 @@ typedef struct VhostUserConfig {
#define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512
#define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64
+#define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024
typedef struct VhostUserCryptoSession {
+ uint64_t op_code;
+ union {
+ struct {
+ CryptoDevBackendSymSessionInfo session_setup_data;
+ uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
+ uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
+ } sym;
+ struct {
+ CryptoDevBackendAsymSessionInfo session_setup_data;
+ uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN];
+ } asym;
+ } u;
+
/* session id for success, -1 on errors */
int64_t session_id;
- CryptoDevBackendSymSessionInfo session_setup_data;
- uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
- uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
} VhostUserCryptoSession;
static VhostUserConfig c __attribute__ ((unused));
@@ -195,19 +188,29 @@ typedef struct VhostUserInflight {
uint16_t queue_size;
} VhostUserInflight;
+typedef struct VhostUserShared {
+ unsigned char uuid[16];
+} VhostUserShared;
+
typedef struct {
VhostUserRequest request;
#define VHOST_USER_VERSION_MASK (0x3)
-#define VHOST_USER_REPLY_MASK (0x1<<2)
+#define VHOST_USER_REPLY_MASK (0x1 << 2)
#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
uint32_t flags;
uint32_t size; /* the following payload size */
} QEMU_PACKED VhostUserHeader;
+/* Request payload of VHOST_USER_SET_DEVICE_STATE_FD */
+typedef struct VhostUserTransferDeviceState {
+ uint32_t direction;
+ uint32_t phase;
+} VhostUserTransferDeviceState;
+
typedef union {
#define VHOST_USER_VRING_IDX_MASK (0xff)
-#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
+#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
uint64_t u64;
struct vhost_vring_state state;
struct vhost_vring_addr addr;
@@ -219,6 +222,8 @@ typedef union {
VhostUserCryptoSession session;
VhostUserVringArea area;
VhostUserInflight inflight;
+ VhostUserShared object;
+ VhostUserTransferDeviceState transfer_state;
} VhostUserPayload;
typedef struct VhostUserMsg {
@@ -238,8 +243,8 @@ struct vhost_user {
struct vhost_dev *dev;
/* Shared between vhost devs of the same virtio device */
VhostUserState *user;
- QIOChannel *slave_ioc;
- GSource *slave_src;
+ QIOChannel *backend_ioc;
+ GSource *backend_src;
NotifierWithReturn postcopy_notifier;
struct PostCopyFD postcopy_fd;
uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS];
@@ -247,7 +252,8 @@ struct vhost_user {
size_t region_rb_len;
/* RAMBlock associated with a given region */
RAMBlock **region_rb;
- /* The offset from the start of the RAMBlock to the start of the
+ /*
+ * The offset from the start of the RAMBlock to the start of the
* vhost region.
*/
ram_addr_t *region_rb_offset;
@@ -266,11 +272,6 @@ struct scrub_regions {
int fd_idx;
};
-static bool ioeventfd_enabled(void)
-{
- return !kvm_enabled() || kvm_eventfds_enabled();
-}
-
static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
{
struct vhost_user *u = dev->opaque;
@@ -280,9 +281,10 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
r = qemu_chr_fe_read_all(chr, p, size);
if (r != size) {
+ int saved_errno = errno;
error_report("Failed to read msg header. Read %d instead of %d."
" Original request %d.", r, size, msg->hdr.request);
- return -1;
+ return r < 0 ? -saved_errno : -EIO;
}
/* validate received flags */
@@ -290,33 +292,24 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
error_report("Failed to read msg header."
" Flags 0x%x instead of 0x%x.", msg->hdr.flags,
VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
- return -1;
+ return -EPROTO;
}
+ trace_vhost_user_read(msg->hdr.request, msg->hdr.flags);
+
return 0;
}
-struct vhost_user_read_cb_data {
- struct vhost_dev *dev;
- VhostUserMsg *msg;
- GMainLoop *loop;
- int ret;
-};
-
-static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
- gpointer opaque)
+static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
{
- struct vhost_user_read_cb_data *data = opaque;
- struct vhost_dev *dev = data->dev;
- VhostUserMsg *msg = data->msg;
struct vhost_user *u = dev->opaque;
CharBackend *chr = u->user->chr;
uint8_t *p = (uint8_t *) msg;
int r, size;
- if (vhost_user_read_header(dev, msg) < 0) {
- data->ret = -1;
- goto end;
+ r = vhost_user_read_header(dev, msg);
+ if (r < 0) {
+ return r;
}
/* validate message size is sane */
@@ -324,8 +317,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
error_report("Failed to read msg header."
" Size %d exceeds the maximum %zu.", msg->hdr.size,
VHOST_USER_PAYLOAD_SIZE);
- data->ret = -1;
- goto end;
+ return -EPROTO;
}
if (msg->hdr.size) {
@@ -333,112 +325,42 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
size = msg->hdr.size;
r = qemu_chr_fe_read_all(chr, p, size);
if (r != size) {
+ int saved_errno = errno;
error_report("Failed to read msg payload."
" Read %d instead of %d.", r, msg->hdr.size);
- data->ret = -1;
- goto end;
+ return r < 0 ? -saved_errno : -EIO;
}
}
-end:
- g_main_loop_quit(data->loop);
- return G_SOURCE_REMOVE;
-}
-
-static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
- gpointer opaque);
-
-/*
- * This updates the read handler to use a new event loop context.
- * Event sources are removed from the previous context : this ensures
- * that events detected in the previous context are purged. They will
- * be re-detected and processed in the new context.
- */
-static void slave_update_read_handler(struct vhost_dev *dev,
- GMainContext *ctxt)
-{
- struct vhost_user *u = dev->opaque;
-
- if (!u->slave_ioc) {
- return;
- }
-
- if (u->slave_src) {
- g_source_destroy(u->slave_src);
- g_source_unref(u->slave_src);
- }
-
- u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
- G_IO_IN | G_IO_HUP,
- slave_read, dev, NULL,
- ctxt);
-}
-
-static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
-{
- struct vhost_user *u = dev->opaque;
- CharBackend *chr = u->user->chr;
- GMainContext *prev_ctxt = chr->chr->gcontext;
- GMainContext *ctxt = g_main_context_new();
- GMainLoop *loop = g_main_loop_new(ctxt, FALSE);
- struct vhost_user_read_cb_data data = {
- .dev = dev,
- .loop = loop,
- .msg = msg,
- .ret = 0
- };
-
- /*
- * We want to be able to monitor the slave channel fd while waiting
- * for chr I/O. This requires an event loop, but we can't nest the
- * one to which chr is currently attached : its fd handlers might not
- * be prepared for re-entrancy. So we create a new one and switch chr
- * to use it.
- */
- slave_update_read_handler(dev, ctxt);
- qemu_chr_be_update_read_handlers(chr->chr, ctxt);
- qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data);
-
- g_main_loop_run(loop);
-
- /*
- * Restore the previous event loop context. This also destroys/recreates
- * event sources : this guarantees that all pending events in the original
- * context that have been processed by the nested loop are purged.
- */
- qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt);
- slave_update_read_handler(dev, NULL);
-
- g_main_loop_unref(loop);
- g_main_context_unref(ctxt);
-
- return data.ret;
+ return 0;
}
static int process_message_reply(struct vhost_dev *dev,
const VhostUserMsg *msg)
{
+ int ret;
VhostUserMsg msg_reply;
if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
return 0;
}
- if (vhost_user_read(dev, &msg_reply) < 0) {
- return -1;
+ ret = vhost_user_read(dev, &msg_reply);
+ if (ret < 0) {
+ return ret;
}
if (msg_reply.hdr.request != msg->hdr.request) {
error_report("Received unexpected msg type. "
"Expected %d received %d",
msg->hdr.request, msg_reply.hdr.request);
- return -1;
+ return -EPROTO;
}
- return msg_reply.payload.u64 ? -1 : 0;
+ return msg_reply.payload.u64 ? -EIO : 0;
}
-static bool vhost_user_one_time_request(VhostUserRequest request)
+static bool vhost_user_per_device_request(VhostUserRequest request)
{
switch (request) {
case VHOST_USER_SET_OWNER:
@@ -446,6 +368,9 @@ static bool vhost_user_one_time_request(VhostUserRequest request)
case VHOST_USER_SET_MEM_TABLE:
case VHOST_USER_GET_QUEUE_NUM:
case VHOST_USER_NET_SET_MTU:
+ case VHOST_USER_RESET_DEVICE:
+ case VHOST_USER_ADD_MEM_REG:
+ case VHOST_USER_REM_MEM_REG:
return true;
default:
return false;
@@ -461,27 +386,36 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
/*
- * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
- * we just need send it once in the first time. For later such
- * request, we just ignore it.
+ * Some devices, like virtio-scsi, are implemented as a single vhost_dev,
+ * while others, like virtio-net, contain multiple vhost_devs. For
+ * operations such as configuring device memory mappings or issuing device
+ * resets, which affect the whole device instead of individual VQs,
+ * vhost-user messages should only be sent once.
+ *
+ * Devices with multiple vhost_devs are given an associated dev->vq_index
+ * so per_device requests are only sent if vq_index is 0.
*/
- if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
+ if (vhost_user_per_device_request(msg->hdr.request)
+ && dev->vq_index != 0) {
msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
return 0;
}
if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
error_report("Failed to set msg fds.");
- return -1;
+ return -EINVAL;
}
ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
if (ret != size) {
+ int saved_errno = errno;
error_report("Failed to write msg."
" Wrote %d instead of %d.", ret, size);
- return -1;
+ return ret < 0 ? -saved_errno : -EIO;
}
+ trace_vhost_user_write(msg->hdr.request, msg->hdr.flags);
+
return 0;
}
@@ -502,6 +436,7 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
size_t fd_num = 0;
bool shmfd = virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+ int ret;
VhostUserMsg msg = {
.hdr.request = VHOST_USER_SET_LOG_BASE,
.hdr.flags = VHOST_USER_VERSION,
@@ -510,25 +445,32 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
.hdr.size = sizeof(msg.payload.log),
};
+ /* Send only once with first queue pair */
+ if (dev->vq_index != 0) {
+ return 0;
+ }
+
if (shmfd && log->fd != -1) {
fds[fd_num++] = log->fd;
}
- if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
- return -1;
+ ret = vhost_user_write(dev, &msg, fds, fd_num);
+ if (ret < 0) {
+ return ret;
}
if (shmfd) {
msg.hdr.size = 0;
- if (vhost_user_read(dev, &msg) < 0) {
- return -1;
+ ret = vhost_user_read(dev, &msg);
+ if (ret < 0) {
+ return ret;
}
if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
error_report("Received unexpected msg type. "
"Expected %d received %d",
VHOST_USER_SET_LOG_BASE, msg.hdr.request);
- return -1;
+ return -EPROTO;
}
}
@@ -543,6 +485,7 @@ static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset,
assert((uintptr_t)addr == addr);
mr = memory_region_from_host((void *)(uintptr_t)addr, offset);
*fd = memory_region_get_fd(mr);
+ *offset += mr->ram_block->fd_offset;
return mr;
}
@@ -588,7 +531,7 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
u->region_rb[i] = mr->ram_block;
} else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
error_report("Failed preparing vhost-user memory table msg");
- return -1;
+ return -ENOBUFS;
}
vhost_user_fill_msg_region(&region_buffer, reg, offset);
msg->payload.memory.regions[*fd_num] = region_buffer;
@@ -604,14 +547,14 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
if (!*fd_num) {
error_report("Failed initializing vhost-user memory map, "
"consider using -object memory-backend-file share=on");
- return -1;
+ return -EINVAL;
}
msg->hdr.size = sizeof(msg->payload.memory.nregions);
msg->hdr.size += sizeof(msg->payload.memory.padding);
msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
- return 1;
+ return 0;
}
static inline bool reg_equal(struct vhost_memory_region *shadow_reg,
@@ -741,8 +684,9 @@ static int send_remove_regions(struct vhost_dev *dev,
vhost_user_fill_msg_region(&region_buffer, shadow_reg, 0);
msg->payload.mem_reg.region = region_buffer;
- if (vhost_user_write(dev, msg, &fd, 1) < 0) {
- return -1;
+ ret = vhost_user_write(dev, msg, NULL, 0);
+ if (ret < 0) {
+ return ret;
}
if (reply_supported) {
@@ -801,15 +745,17 @@ static int send_add_regions(struct vhost_dev *dev,
vhost_user_fill_msg_region(&region_buffer, reg, offset);
msg->payload.mem_reg.region = region_buffer;
- if (vhost_user_write(dev, msg, &fd, 1) < 0) {
- return -1;
+ ret = vhost_user_write(dev, msg, &fd, 1);
+ if (ret < 0) {
+ return ret;
}
if (track_ramblocks) {
uint64_t reply_gpa;
- if (vhost_user_read(dev, &msg_reply) < 0) {
- return -1;
+ ret = vhost_user_read(dev, &msg_reply);
+ if (ret < 0) {
+ return ret;
}
reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr;
@@ -819,7 +765,7 @@ static int send_add_regions(struct vhost_dev *dev,
"Expected %d received %d", __func__,
VHOST_USER_ADD_MEM_REG,
msg_reply.hdr.request);
- return -1;
+ return -EPROTO;
}
/*
@@ -830,7 +776,7 @@ static int send_add_regions(struct vhost_dev *dev,
error_report("%s: Unexpected size for postcopy reply "
"%d vs %d", __func__, msg_reply.hdr.size,
msg->hdr.size);
- return -1;
+ return -EPROTO;
}
/* Get the postcopy client base from the backend's reply. */
@@ -846,7 +792,7 @@ static int send_add_regions(struct vhost_dev *dev,
"Got guest physical address %" PRIX64 ", expected "
"%" PRIX64, __func__, reply_gpa,
dev->mem->regions[reg_idx].guest_phys_addr);
- return -1;
+ return -EPROTO;
}
} else if (reply_supported) {
ret = process_message_reply(dev, msg);
@@ -887,6 +833,7 @@ static int vhost_user_add_remove_regions(struct vhost_dev *dev,
struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS];
uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {};
int nr_add_reg, nr_rem_reg;
+ int ret;
msg->hdr.size = sizeof(msg->payload.mem_reg);
@@ -894,16 +841,20 @@ static int vhost_user_add_remove_regions(struct vhost_dev *dev,
scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg,
shadow_pcb, track_ramblocks);
- if (nr_rem_reg && send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
- reply_supported) < 0)
- {
- goto err;
+ if (nr_rem_reg) {
+ ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
+ reply_supported);
+ if (ret < 0) {
+ goto err;
+ }
}
- if (nr_add_reg && send_add_regions(dev, add_reg, nr_add_reg, msg,
- shadow_pcb, reply_supported, track_ramblocks) < 0)
- {
- goto err;
+ if (nr_add_reg) {
+ ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb,
+ reply_supported, track_ramblocks);
+ if (ret < 0) {
+ goto err;
+ }
}
if (track_ramblocks) {
@@ -918,8 +869,9 @@ static int vhost_user_add_remove_regions(struct vhost_dev *dev,
msg->hdr.size = sizeof(msg->payload.u64);
msg->payload.u64 = 0; /* OK */
- if (vhost_user_write(dev, msg, NULL, 0) < 0) {
- return -1;
+ ret = vhost_user_write(dev, msg, NULL, 0);
+ if (ret < 0) {
+ return ret;
}
}
@@ -931,7 +883,7 @@ err:
sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
}
- return -1;
+ return ret;
}
static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
@@ -944,6 +896,7 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
size_t fd_num = 0;
VhostUserMsg msg_reply;
int region_i, msg_i;
+ int ret;
VhostUserMsg msg = {
.hdr.flags = VHOST_USER_VERSION,
@@ -961,29 +914,32 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
}
if (config_mem_slots) {
- if (vhost_user_add_remove_regions(dev, &msg, reply_supported,
- true) < 0) {
- return -1;
+ ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true);
+ if (ret < 0) {
+ return ret;
}
} else {
- if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
- true) < 0) {
- return -1;
+ ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
+ true);
+ if (ret < 0) {
+ return ret;
}
- if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
- return -1;
+ ret = vhost_user_write(dev, &msg, fds, fd_num);
+ if (ret < 0) {
+ return ret;
}
- if (vhost_user_read(dev, &msg_reply) < 0) {
- return -1;
+ ret = vhost_user_read(dev, &msg_reply);
+ if (ret < 0) {
+ return ret;
}
if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
error_report("%s: Received unexpected msg type."
"Expected %d received %d", __func__,
VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
- return -1;
+ return -EPROTO;
}
/*
@@ -994,7 +950,7 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
error_report("%s: Unexpected size for postcopy reply "
"%d vs %d", __func__, msg_reply.hdr.size,
msg.hdr.size);
- return -1;
+ return -EPROTO;
}
memset(u->postcopy_client_bases, 0,
@@ -1024,7 +980,7 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
error_report("%s: postcopy reply not fully consumed "
"%d vs %zd",
__func__, msg_i, fd_num);
- return -1;
+ return -EIO;
}
/*
@@ -1035,8 +991,9 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
/* TODO: Use this for failure cases as well with a bad value. */
msg.hdr.size = sizeof(msg.payload.u64);
msg.payload.u64 = 0; /* OK */
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- return -1;
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
+ return ret;
}
}
@@ -1055,6 +1012,7 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
bool config_mem_slots =
virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
+ int ret;
if (do_postcopy) {
/*
@@ -1074,17 +1032,20 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
}
if (config_mem_slots) {
- if (vhost_user_add_remove_regions(dev, &msg, reply_supported,
- false) < 0) {
- return -1;
+ ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false);
+ if (ret < 0) {
+ return ret;
}
} else {
- if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
- false) < 0) {
- return -1;
+ ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
+ false);
+ if (ret < 0) {
+ return ret;
}
- if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
- return -1;
+
+ ret = vhost_user_write(dev, &msg, fds, fd_num);
+ if (ret < 0) {
+ return ret;
}
if (reply_supported) {
@@ -1109,11 +1070,92 @@ static int vhost_user_set_vring_endian(struct vhost_dev *dev,
if (!cross_endian) {
error_report("vhost-user trying to send unhandled ioctl");
- return -1;
+ return -ENOTSUP;
+ }
+
+ return vhost_user_write(dev, &msg, NULL, 0);
+}
+
+static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
+{
+ int ret;
+ VhostUserMsg msg = {
+ .hdr.request = request,
+ .hdr.flags = VHOST_USER_VERSION,
+ };
+
+ if (vhost_user_per_device_request(request) && dev->vq_index != 0) {
+ return 0;
+ }
+
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = vhost_user_read(dev, &msg);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (msg.hdr.request != request) {
+ error_report("Received unexpected msg type. Expected %d received %d",
+ request, msg.hdr.request);
+ return -EPROTO;
+ }
+
+ if (msg.hdr.size != sizeof(msg.payload.u64)) {
+ error_report("Received bad msg size.");
+ return -EPROTO;
+ }
+
+ *u64 = msg.payload.u64;
+
+ return 0;
+}
+
+static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
+{
+ if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) {
+ return -EPROTO;
+ }
+
+ return 0;
+}
+
+/* Note: "msg->hdr.flags" may be modified. */
+static int vhost_user_write_sync(struct vhost_dev *dev, VhostUserMsg *msg,
+ bool wait_for_reply)
+{
+ int ret;
+
+ if (wait_for_reply) {
+ bool reply_supported = virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_REPLY_ACK);
+ if (reply_supported) {
+ msg->hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
+ }
}
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- return -1;
+ ret = vhost_user_write(dev, msg, NULL, 0);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (wait_for_reply) {
+ uint64_t dummy;
+
+ if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
+ return process_message_reply(dev, msg);
+ }
+
+ /*
+ * We need to wait for a reply but the backend does not
+ * support replies for the command we just sent.
+ * Send VHOST_USER_GET_FEATURES which makes all backends
+ * send a reply.
+ */
+ return vhost_user_get_features(dev, &dummy);
}
return 0;
@@ -1121,7 +1163,8 @@ static int vhost_user_set_vring_endian(struct vhost_dev *dev,
static int vhost_set_vring(struct vhost_dev *dev,
unsigned long int request,
- struct vhost_vring_state *ring)
+ struct vhost_vring_state *ring,
+ bool wait_for_reply)
{
VhostUserMsg msg = {
.hdr.request = request,
@@ -1130,51 +1173,44 @@ static int vhost_set_vring(struct vhost_dev *dev,
.hdr.size = sizeof(msg.payload.state),
};
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- return -1;
- }
-
- return 0;
+ return vhost_user_write_sync(dev, &msg, wait_for_reply);
}
static int vhost_user_set_vring_num(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
- return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
+ return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring, false);
}
-static void vhost_user_host_notifier_restore(struct vhost_dev *dev,
- int queue_idx)
+static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
{
- struct vhost_user *u = dev->opaque;
- VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
- VirtIODevice *vdev = dev->vdev;
-
- if (n->addr && !n->set) {
- virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true);
- n->set = true;
- }
+ assert(n && n->unmap_addr);
+ munmap(n->unmap_addr, qemu_real_host_page_size());
+ n->unmap_addr = NULL;
}
-static void vhost_user_host_notifier_remove(struct vhost_dev *dev,
- int queue_idx)
+/*
+ * clean-up function for notifier, will finally free the structure
+ * under rcu.
+ */
+static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
+ VirtIODevice *vdev)
{
- struct vhost_user *u = dev->opaque;
- VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
- VirtIODevice *vdev = dev->vdev;
-
- if (n->addr && n->set) {
- virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
- n->set = false;
+ if (n->addr) {
+ if (vdev) {
+ virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false);
+ }
+ assert(!n->unmap_addr);
+ n->unmap_addr = n->addr;
+ n->addr = NULL;
+ call_rcu(n, vhost_user_host_notifier_free, rcu);
}
}
static int vhost_user_set_vring_base(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
- vhost_user_host_notifier_restore(dev, ring->index);
-
- return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
+ return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring, false);
}
static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
@@ -1182,50 +1218,89 @@ static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
int i;
if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
- return -1;
+ return -EINVAL;
}
for (i = 0; i < dev->nvqs; ++i) {
+ int ret;
struct vhost_vring_state state = {
.index = dev->vq_index + i,
.num = enable,
};
- vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
+ /*
+ * SET_VRING_ENABLE travels from guest to QEMU to vhost-user backend /
+ * control plane thread via unix domain socket. Virtio requests travel
+ * from guest to vhost-user backend / data plane thread via eventfd.
+ * Even if the guest enables the ring first, and pushes its first virtio
+ * request second (conforming to the virtio spec), the data plane thread
+ * in the backend may see the virtio request before the control plane
+ * thread sees the queue enablement. This causes (in fact, requires) the
+ * data plane thread to discard the virtio request (it arrived on a
+ * seemingly disabled queue). To prevent this out-of-order delivery,
+ * don't let the guest proceed to pushing the virtio request until the
+ * backend control plane acknowledges enabling the queue -- IOW, pass
+ * wait_for_reply=true below.
+ */
+ ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state, true);
+ if (ret < 0) {
+ /*
+ * Restoring the previous state is likely infeasible, as well as
+ * proceeding regardless the error, so just bail out and hope for
+ * the device-level recovery.
+ */
+ return ret;
+ }
}
return 0;
}
+static VhostUserHostNotifier *fetch_notifier(VhostUserState *u,
+ int idx)
+{
+ if (idx >= u->notifiers->len) {
+ return NULL;
+ }
+ return g_ptr_array_index(u->notifiers, idx);
+}
+
static int vhost_user_get_vring_base(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
+ int ret;
VhostUserMsg msg = {
.hdr.request = VHOST_USER_GET_VRING_BASE,
.hdr.flags = VHOST_USER_VERSION,
.payload.state = *ring,
.hdr.size = sizeof(msg.payload.state),
};
+ struct vhost_user *u = dev->opaque;
- vhost_user_host_notifier_remove(dev, ring->index);
+ VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
+ if (n) {
+ vhost_user_host_notifier_remove(n, dev->vdev);
+ }
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- return -1;
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
+ return ret;
}
- if (vhost_user_read(dev, &msg) < 0) {
- return -1;
+ ret = vhost_user_read(dev, &msg);
+ if (ret < 0) {
+ return ret;
}
if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
error_report("Received unexpected msg type. Expected %d received %d",
VHOST_USER_GET_VRING_BASE, msg.hdr.request);
- return -1;
+ return -EPROTO;
}
if (msg.hdr.size != sizeof(msg.payload.state)) {
error_report("Received bad msg size.");
- return -1;
+ return -EPROTO;
}
*ring = msg.payload.state;
@@ -1246,17 +1321,13 @@ static int vhost_set_vring_file(struct vhost_dev *dev,
.hdr.size = sizeof(msg.payload.u64),
};
- if (ioeventfd_enabled() && file->fd > 0) {
+ if (file->fd > 0) {
fds[fd_num++] = file->fd;
} else {
msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
}
- if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
- return -1;
- }
-
- return 0;
+ return vhost_user_write(dev, &msg, fds, fd_num);
}
static int vhost_user_set_vring_kick(struct vhost_dev *dev,
@@ -1271,67 +1342,10 @@ static int vhost_user_set_vring_call(struct vhost_dev *dev,
return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
}
-
-static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
-{
- VhostUserMsg msg = {
- .hdr.request = request,
- .hdr.flags = VHOST_USER_VERSION,
- };
-
- if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
- return 0;
- }
-
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- return -1;
- }
-
- if (vhost_user_read(dev, &msg) < 0) {
- return -1;
- }
-
- if (msg.hdr.request != request) {
- error_report("Received unexpected msg type. Expected %d received %d",
- request, msg.hdr.request);
- return -1;
- }
-
- if (msg.hdr.size != sizeof(msg.payload.u64)) {
- error_report("Received bad msg size.");
- return -1;
- }
-
- *u64 = msg.payload.u64;
-
- return 0;
-}
-
-static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
-{
- if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) {
- return -EPROTO;
- }
-
- return 0;
-}
-
-static int enforce_reply(struct vhost_dev *dev,
- const VhostUserMsg *msg)
+static int vhost_user_set_vring_err(struct vhost_dev *dev,
+ struct vhost_vring_file *file)
{
- uint64_t dummy;
-
- if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
- return process_message_reply(dev, msg);
- }
-
- /*
- * We need to wait for a reply but the backend does not
- * support replies for the command we just sent.
- * Send VHOST_USER_GET_FEATURES which makes all backends
- * send a reply.
- */
- return vhost_user_get_features(dev, &dummy);
+ return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file);
}
static int vhost_user_set_vring_addr(struct vhost_dev *dev,
@@ -1344,28 +1358,13 @@ static int vhost_user_set_vring_addr(struct vhost_dev *dev,
.hdr.size = sizeof(msg.payload.addr),
};
- bool reply_supported = virtio_has_feature(dev->protocol_features,
- VHOST_USER_PROTOCOL_F_REPLY_ACK);
-
/*
* wait for a reply if logging is enabled to make sure
* backend is actually logging changes
*/
bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG);
- if (reply_supported && wait_for_reply) {
- msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
- }
-
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- return -1;
- }
-
- if (wait_for_reply) {
- return enforce_reply(dev, &msg);
- }
-
- return 0;
+ return vhost_user_write_sync(dev, &msg, wait_for_reply);
}
static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64,
@@ -1378,23 +1377,44 @@ static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64,
.hdr.size = sizeof(msg.payload.u64),
};
- if (wait_for_reply) {
- bool reply_supported = virtio_has_feature(dev->protocol_features,
- VHOST_USER_PROTOCOL_F_REPLY_ACK);
- if (reply_supported) {
- msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
- }
+ return vhost_user_write_sync(dev, &msg, wait_for_reply);
+}
+
+static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status)
+{
+ return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false);
+}
+
+static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status)
+{
+ uint64_t value;
+ int ret;
+
+ ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value);
+ if (ret < 0) {
+ return ret;
}
+ *status = value;
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- return -1;
+ return 0;
+}
+
+static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status)
+{
+ uint8_t s;
+ int ret;
+
+ ret = vhost_user_get_status(dev, &s);
+ if (ret < 0) {
+ return ret;
}
- if (wait_for_reply) {
- return enforce_reply(dev, &msg);
+ if ((s & status) == status) {
+ return 0;
}
+ s |= status;
- return 0;
+ return vhost_user_set_status(dev, s);
}
static int vhost_user_set_features(struct vhost_dev *dev,
@@ -1405,9 +1425,26 @@ static int vhost_user_set_features(struct vhost_dev *dev,
* backend is actually logging changes
*/
bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL);
+ int ret;
- return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features,
+ /*
+ * We need to include any extra backend only feature bits that
+ * might be needed by our device. Currently this includes the
+ * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol
+ * features.
+ */
+ ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES,
+ features | dev->backend_features,
log_enabled);
+
+ if (virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_STATUS)) {
+ if (!ret) {
+ return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
+ }
+ }
+
+ return ret;
}
static int vhost_user_set_protocol_features(struct vhost_dev *dev,
@@ -1424,11 +1461,7 @@ static int vhost_user_set_owner(struct vhost_dev *dev)
.hdr.flags = VHOST_USER_VERSION,
};
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- return -EPROTO;
- }
-
- return 0;
+ return vhost_user_write(dev, &msg, NULL, 0);
}
static int vhost_user_get_max_memslots(struct vhost_dev *dev,
@@ -1452,41 +1485,64 @@ static int vhost_user_reset_device(struct vhost_dev *dev)
{
VhostUserMsg msg = {
.hdr.flags = VHOST_USER_VERSION,
+ .hdr.request = VHOST_USER_RESET_DEVICE,
};
- msg.hdr.request = virtio_has_feature(dev->protocol_features,
- VHOST_USER_PROTOCOL_F_RESET_DEVICE)
- ? VHOST_USER_RESET_DEVICE
- : VHOST_USER_RESET_OWNER;
-
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- return -1;
+ /*
+ * Historically, reset was not implemented so only reset devices
+ * that are expecting it.
+ */
+ if (!virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_RESET_DEVICE)) {
+ return -ENOSYS;
}
- return 0;
+ return vhost_user_write(dev, &msg, NULL, 0);
}
-static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
+static int vhost_user_backend_handle_config_change(struct vhost_dev *dev)
{
- int ret = -1;
+ if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
+ return -ENOSYS;
+ }
+
+ return dev->config_ops->vhost_dev_config_notifier(dev);
+}
- if (!dev->config_ops) {
- return -1;
+/*
+ * Fetch or create the notifier for a given idx. Newly created
+ * notifiers are added to the pointer array that tracks them.
+ */
+static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u,
+ int idx)
+{
+ VhostUserHostNotifier *n = NULL;
+ if (idx >= u->notifiers->len) {
+ g_ptr_array_set_size(u->notifiers, idx + 1);
}
- if (dev->config_ops->vhost_dev_config_notifier) {
- ret = dev->config_ops->vhost_dev_config_notifier(dev);
+ n = g_ptr_array_index(u->notifiers, idx);
+ if (!n) {
+ /*
+ * In case notification arrive out-of-order,
+ * make room for current index.
+ */
+ g_ptr_array_remove_index(u->notifiers, idx);
+ n = g_new0(VhostUserHostNotifier, 1);
+ n->idx = idx;
+ g_ptr_array_insert(u->notifiers, idx, n);
+ trace_vhost_user_create_notifier(idx, n);
}
- return ret;
+ return n;
}
-static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
+static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev,
VhostUserVringArea *area,
int fd)
{
int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
- size_t page_size = qemu_real_host_page_size;
+ size_t page_size = qemu_real_host_page_size();
struct vhost_user *u = dev->opaque;
VhostUserState *user = u->user;
VirtIODevice *vdev = dev->vdev;
@@ -1497,17 +1553,15 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
if (!virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
- return -1;
+ return -EINVAL;
}
- n = &user->notifier[queue_idx];
-
- if (n->addr) {
- virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
- object_unparent(OBJECT(&n->mr));
- munmap(n->addr, page_size);
- n->addr = NULL;
- }
+ /*
+ * Fetch notifier and invalidate any old data before setting up
+ * new mapped address.
+ */
+ n = fetch_or_create_notifier(user, queue_idx);
+ vhost_user_host_notifier_remove(n, vdev);
if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
return 0;
@@ -1515,43 +1569,195 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
/* Sanity check. */
if (area->size != page_size) {
- return -1;
+ return -EINVAL;
}
addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
fd, area->offset);
if (addr == MAP_FAILED) {
- return -1;
+ return -EFAULT;
}
name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
user, queue_idx);
- memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
- page_size, addr);
+ if (!n->mr.ram) { /* Don't init again after suspend. */
+ memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
+ page_size, addr);
+ } else {
+ n->mr.ram_block->host = addr;
+ }
g_free(name);
if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
object_unparent(OBJECT(&n->mr));
munmap(addr, page_size);
- return -1;
+ return -ENXIO;
}
n->addr = addr;
- n->set = true;
return 0;
}
-static void close_slave_channel(struct vhost_user *u)
+static int
+vhost_user_backend_handle_shared_object_add(struct vhost_dev *dev,
+ VhostUserShared *object)
+{
+ QemuUUID uuid;
+
+ memcpy(uuid.data, object->uuid, sizeof(object->uuid));
+ return virtio_add_vhost_device(&uuid, dev);
+}
+
+static int
+vhost_user_backend_handle_shared_object_remove(struct vhost_dev *dev,
+ VhostUserShared *object)
+{
+ QemuUUID uuid;
+
+ memcpy(uuid.data, object->uuid, sizeof(object->uuid));
+ switch (virtio_object_type(&uuid)) {
+ case TYPE_VHOST_DEV:
+ {
+ struct vhost_dev *owner = virtio_lookup_vhost_device(&uuid);
+ if (dev != owner) {
+ /* Not allowed to remove non-owned entries */
+ return 0;
+ }
+ break;
+ }
+ default:
+ /* Not allowed to remove non-owned entries */
+ return 0;
+ }
+
+ return virtio_remove_resource(&uuid);
+}
+
+static bool vhost_user_send_resp(QIOChannel *ioc, VhostUserHeader *hdr,
+ VhostUserPayload *payload, Error **errp)
+{
+ struct iovec iov[] = {
+ { .iov_base = hdr, .iov_len = VHOST_USER_HDR_SIZE },
+ { .iov_base = payload, .iov_len = hdr->size },
+ };
+
+ hdr->flags &= ~VHOST_USER_NEED_REPLY_MASK;
+ hdr->flags |= VHOST_USER_REPLY_MASK;
+
+ return !qio_channel_writev_all(ioc, iov, ARRAY_SIZE(iov), errp);
+}
+
+static bool
+vhost_user_backend_send_dmabuf_fd(QIOChannel *ioc, VhostUserHeader *hdr,
+ VhostUserPayload *payload, Error **errp)
+{
+ hdr->size = sizeof(payload->u64);
+ return vhost_user_send_resp(ioc, hdr, payload, errp);
+}
+
+int vhost_user_get_shared_object(struct vhost_dev *dev, unsigned char *uuid,
+ int *dmabuf_fd)
+{
+ struct vhost_user *u = dev->opaque;
+ CharBackend *chr = u->user->chr;
+ int ret;
+ VhostUserMsg msg = {
+ .hdr.request = VHOST_USER_GET_SHARED_OBJECT,
+ .hdr.flags = VHOST_USER_VERSION,
+ };
+ memcpy(msg.payload.object.uuid, uuid, sizeof(msg.payload.object.uuid));
+
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = vhost_user_read(dev, &msg);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (msg.hdr.request != VHOST_USER_GET_SHARED_OBJECT) {
+ error_report("Received unexpected msg type. "
+ "Expected %d received %d",
+ VHOST_USER_GET_SHARED_OBJECT, msg.hdr.request);
+ return -EPROTO;
+ }
+
+ *dmabuf_fd = qemu_chr_fe_get_msgfd(chr);
+ if (*dmabuf_fd < 0) {
+ error_report("Failed to get dmabuf fd");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int
+vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u,
+ QIOChannel *ioc,
+ VhostUserHeader *hdr,
+ VhostUserPayload *payload)
+{
+ QemuUUID uuid;
+ CharBackend *chr = u->user->chr;
+ Error *local_err = NULL;
+ int dmabuf_fd = -1;
+ int fd_num = 0;
+
+ memcpy(uuid.data, payload->object.uuid, sizeof(payload->object.uuid));
+
+ payload->u64 = 0;
+ switch (virtio_object_type(&uuid)) {
+ case TYPE_DMABUF:
+ dmabuf_fd = virtio_lookup_dmabuf(&uuid);
+ break;
+ case TYPE_VHOST_DEV:
+ {
+ struct vhost_dev *dev = virtio_lookup_vhost_device(&uuid);
+ if (dev == NULL) {
+ payload->u64 = -EINVAL;
+ break;
+ }
+ int ret = vhost_user_get_shared_object(dev, uuid.data, &dmabuf_fd);
+ if (ret < 0) {
+ payload->u64 = ret;
+ }
+ break;
+ }
+ case TYPE_INVALID:
+ payload->u64 = -EINVAL;
+ break;
+ }
+
+ if (dmabuf_fd != -1) {
+ fd_num++;
+ }
+
+ if (qemu_chr_fe_set_msgfds(chr, &dmabuf_fd, fd_num) < 0) {
+ error_report("Failed to set msg fds.");
+ payload->u64 = -EINVAL;
+ }
+
+ if (!vhost_user_backend_send_dmabuf_fd(ioc, hdr, payload, &local_err)) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void close_backend_channel(struct vhost_user *u)
{
- g_source_destroy(u->slave_src);
- g_source_unref(u->slave_src);
- u->slave_src = NULL;
- object_unref(OBJECT(u->slave_ioc));
- u->slave_ioc = NULL;
+ g_source_destroy(u->backend_src);
+ g_source_unref(u->backend_src);
+ u->backend_src = NULL;
+ object_unref(OBJECT(u->backend_ioc));
+ u->backend_ioc = NULL;
}
-static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
+static gboolean backend_read(QIOChannel *ioc, GIOCondition condition,
gpointer opaque)
{
struct vhost_dev *dev = opaque;
@@ -1589,16 +1795,27 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
}
switch (hdr.request) {
- case VHOST_USER_SLAVE_IOTLB_MSG:
+ case VHOST_USER_BACKEND_IOTLB_MSG:
ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
break;
- case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
- ret = vhost_user_slave_handle_config_change(dev);
+ case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG:
+ ret = vhost_user_backend_handle_config_change(dev);
break;
- case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
- ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
+ case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG:
+ ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area,
fd ? fd[0] : -1);
break;
+ case VHOST_USER_BACKEND_SHARED_OBJECT_ADD:
+ ret = vhost_user_backend_handle_shared_object_add(dev, &payload.object);
+ break;
+ case VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE:
+ ret = vhost_user_backend_handle_shared_object_remove(dev,
+ &payload.object);
+ break;
+ case VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP:
+ ret = vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc,
+ &hdr, &payload);
+ break;
default:
error_report("Received unexpected msg type: %d.", hdr.request);
ret = -EINVAL;
@@ -1609,21 +1826,10 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
* directly in their request handlers.
*/
if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
- struct iovec iovec[2];
-
-
- hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
- hdr.flags |= VHOST_USER_REPLY_MASK;
-
payload.u64 = !!ret;
hdr.size = sizeof(payload.u64);
- iovec[0].iov_base = &hdr;
- iovec[0].iov_len = VHOST_USER_HDR_SIZE;
- iovec[1].iov_base = &payload;
- iovec[1].iov_len = hdr.size;
-
- if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) {
+ if (!vhost_user_send_resp(ioc, &hdr, &payload, &local_err)) {
error_report_err(local_err);
goto err;
}
@@ -1632,7 +1838,7 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
goto fdcleanup;
err:
- close_slave_channel(u);
+ close_backend_channel(u);
rc = G_SOURCE_REMOVE;
fdcleanup:
@@ -1644,10 +1850,10 @@ fdcleanup:
return rc;
}
-static int vhost_setup_slave_channel(struct vhost_dev *dev)
+static int vhost_setup_backend_channel(struct vhost_dev *dev)
{
VhostUserMsg msg = {
- .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD,
+ .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD,
.hdr.flags = VHOST_USER_VERSION,
};
struct vhost_user *u = dev->opaque;
@@ -1658,22 +1864,25 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev)
QIOChannel *ioc;
if (!virtio_has_feature(dev->protocol_features,
- VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
+ VHOST_USER_PROTOCOL_F_BACKEND_REQ)) {
return 0;
}
- if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
+ if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
+ int saved_errno = errno;
error_report("socketpair() failed");
- return -1;
+ return -saved_errno;
}
ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err));
if (!ioc) {
error_report_err(local_err);
- return -1;
+ return -ECONNREFUSED;
}
- u->slave_ioc = ioc;
- slave_update_read_handler(dev, NULL);
+ u->backend_ioc = ioc;
+ u->backend_src = qio_channel_add_watch_source(u->backend_ioc,
+ G_IO_IN | G_IO_HUP,
+ backend_read, dev, NULL, NULL);
if (reply_supported) {
msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
@@ -1691,7 +1900,7 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev)
out:
close(sv[1]);
if (ret) {
- close_slave_channel(u);
+ close_backend_channel(u);
}
return ret;
@@ -1777,37 +1986,40 @@ static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
struct vhost_user *u = dev->opaque;
CharBackend *chr = u->user->chr;
int ufd;
+ int ret;
VhostUserMsg msg = {
.hdr.request = VHOST_USER_POSTCOPY_ADVISE,
.hdr.flags = VHOST_USER_VERSION,
};
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
error_setg(errp, "Failed to send postcopy_advise to vhost");
- return -1;
+ return ret;
}
- if (vhost_user_read(dev, &msg) < 0) {
+ ret = vhost_user_read(dev, &msg);
+ if (ret < 0) {
error_setg(errp, "Failed to get postcopy_advise reply from vhost");
- return -1;
+ return ret;
}
if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
error_setg(errp, "Unexpected msg type. Expected %d received %d",
VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
- return -1;
+ return -EPROTO;
}
if (msg.hdr.size) {
error_setg(errp, "Received bad msg size.");
- return -1;
+ return -EPROTO;
}
ufd = qemu_chr_fe_get_msgfd(chr);
if (ufd < 0) {
error_setg(errp, "%s: Failed to get ufd", __func__);
- return -1;
+ return -EIO;
}
- qemu_set_nonblock(ufd);
+ qemu_socket_set_nonblock(ufd);
/* register ufd with userfault thread */
u->postcopy_fd.fd = ufd;
@@ -1819,7 +2031,7 @@ static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
return 0;
#else
error_setg(errp, "Postcopy not supported on non-Linux systems");
- return -1;
+ return -ENOSYS;
#endif
}
@@ -1835,10 +2047,13 @@ static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
.hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
};
u->postcopy_listen = true;
+
trace_vhost_user_postcopy_listen();
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
error_setg(errp, "Failed to send postcopy_listen to vhost");
- return -1;
+ return ret;
}
ret = process_message_reply(dev, &msg);
@@ -1863,9 +2078,11 @@ static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
struct vhost_user *u = dev->opaque;
trace_vhost_user_postcopy_end_entry();
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
error_setg(errp, "Failed to send postcopy_end to vhost");
- return -1;
+ return ret;
}
ret = process_message_reply(dev, &msg);
@@ -1883,7 +2100,7 @@ static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
}
static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
- void *opaque)
+ void *opaque, Error **errp)
{
struct PostcopyNotifyData *pnd = opaque;
struct vhost_user *u = container_of(notifier, struct vhost_user,
@@ -1895,20 +2112,20 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
if (!virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
/* TODO: Get the device name into this error somehow */
- error_setg(pnd->errp,
+ error_setg(errp,
"vhost-user backend not capable of postcopy");
return -ENOENT;
}
break;
case POSTCOPY_NOTIFY_INBOUND_ADVISE:
- return vhost_user_postcopy_advise(dev, pnd->errp);
+ return vhost_user_postcopy_advise(dev, errp);
case POSTCOPY_NOTIFY_INBOUND_LISTEN:
- return vhost_user_postcopy_listen(dev, pnd->errp);
+ return vhost_user_postcopy_listen(dev, errp);
case POSTCOPY_NOTIFY_INBOUND_END:
- return vhost_user_postcopy_end(dev, pnd->errp);
+ return vhost_user_postcopy_end(dev, errp);
default:
/* We ignore notifications we don't know */
@@ -1921,14 +2138,15 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
Error **errp)
{
- uint64_t features, protocol_features, ram_slots;
+ uint64_t features, ram_slots;
struct vhost_user *u;
+ VhostUserState *vus = (VhostUserState *) opaque;
int err;
assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
u = g_new0(struct vhost_user, 1);
- u->user = opaque;
+ u->user = vus;
u->dev = dev;
dev->opaque = u;
@@ -1939,6 +2157,10 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
}
if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
+ bool supports_f_config = vus->supports_config ||
+ (dev->config_ops && dev->config_ops->vhost_dev_config_notifier);
+ uint64_t protocol_features;
+
dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
@@ -1948,19 +2170,32 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
return -EPROTO;
}
- dev->protocol_features =
- protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
-
- if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
- /* Don't acknowledge CONFIG feature if device doesn't support it */
- dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
- } else if (!(protocol_features &
- (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
- error_setg(errp, "Device expects VHOST_USER_PROTOCOL_F_CONFIG "
- "but backend does not support it.");
- return -EINVAL;
+ /*
+ * We will use all the protocol features we support - although
+ * we suppress F_CONFIG if we know QEMUs internal code can not support
+ * it.
+ */
+ protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK;
+
+ if (supports_f_config) {
+ if (!virtio_has_feature(protocol_features,
+ VHOST_USER_PROTOCOL_F_CONFIG)) {
+ error_setg(errp, "vhost-user device expecting "
+ "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does "
+ "not support it.");
+ return -EPROTO;
+ }
+ } else {
+ if (virtio_has_feature(protocol_features,
+ VHOST_USER_PROTOCOL_F_CONFIG)) {
+ warn_report("vhost-user backend supports "
+ "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not.");
+ protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+ }
}
+ /* final set of protocol features */
+ dev->protocol_features = protocol_features;
err = vhost_user_set_protocol_features(dev, dev->protocol_features);
if (err < 0) {
error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
@@ -1987,11 +2222,11 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
!(virtio_has_feature(dev->protocol_features,
- VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
+ VHOST_USER_PROTOCOL_F_BACKEND_REQ) &&
virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
error_setg(errp, "IOMMU support requires reply-ack and "
- "slave-req protocol features.");
+ "backend-req protocol features.");
return -EINVAL;
}
@@ -2027,7 +2262,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
}
if (dev->vq_index == 0) {
- err = vhost_setup_slave_channel(dev);
+ err = vhost_setup_backend_channel(dev);
if (err < 0) {
error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
return -EPROTO;
@@ -2057,8 +2292,8 @@ static int vhost_user_backend_cleanup(struct vhost_dev *dev)
close(u->postcopy_fd.fd);
u->postcopy_fd.handler = NULL;
}
- if (u->slave_ioc) {
- close_slave_channel(u);
+ if (u->backend_ioc) {
+ close_backend_channel(u);
}
g_free(u->region_rb);
u->region_rb = NULL;
@@ -2114,20 +2349,7 @@ static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
return vhost_user_write(dev, &msg, NULL, 0);
}
- return -1;
-}
-
-static bool vhost_user_can_merge(struct vhost_dev *dev,
- uint64_t start1, uint64_t size1,
- uint64_t start2, uint64_t size2)
-{
- ram_addr_t offset;
- int mfd, rfd;
-
- (void)vhost_user_get_mr_data(start1, &offset, &mfd);
- (void)vhost_user_get_mr_data(start2, &offset, &rfd);
-
- return mfd == rfd;
+ return -ENOTSUP;
}
static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
@@ -2135,6 +2357,7 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
VhostUserMsg msg;
bool reply_supported = virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_REPLY_ACK);
+ int ret;
if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
return 0;
@@ -2148,11 +2371,12 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
}
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- return -1;
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
+ return ret;
}
- /* If reply_ack supported, slave has to ack specified MTU is valid */
+ /* If reply_ack supported, backend has to ack specified MTU is valid */
if (reply_supported) {
return process_message_reply(dev, &msg);
}
@@ -2163,6 +2387,7 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
struct vhost_iotlb_msg *imsg)
{
+ int ret;
VhostUserMsg msg = {
.hdr.request = VHOST_USER_IOTLB_MSG,
.hdr.size = sizeof(msg.payload.iotlb),
@@ -2170,8 +2395,9 @@ static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
.payload.iotlb = *imsg,
};
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- return -EFAULT;
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
+ return ret;
}
return process_message_reply(dev, &msg);
@@ -2186,6 +2412,7 @@ static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
uint32_t config_len, Error **errp)
{
+ int ret;
VhostUserMsg msg = {
.hdr.request = VHOST_USER_GET_CONFIG,
.hdr.flags = VHOST_USER_VERSION,
@@ -2202,26 +2429,28 @@ static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
msg.payload.config.offset = 0;
msg.payload.config.size = config_len;
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- error_setg_errno(errp, EPROTO, "vhost_get_config failed");
- return -EPROTO;
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "vhost_get_config failed");
+ return ret;
}
- if (vhost_user_read(dev, &msg) < 0) {
- error_setg_errno(errp, EPROTO, "vhost_get_config failed");
- return -EPROTO;
+ ret = vhost_user_read(dev, &msg);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "vhost_get_config failed");
+ return ret;
}
if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
error_setg(errp,
"Received unexpected msg type. Expected %d received %d",
VHOST_USER_GET_CONFIG, msg.hdr.request);
- return -EINVAL;
+ return -EPROTO;
}
if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
error_setg(errp, "Received bad msg size.");
- return -EINVAL;
+ return -EPROTO;
}
memcpy(config, msg.payload.config.region, config_len);
@@ -2232,6 +2461,7 @@ static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
uint32_t offset, uint32_t size, uint32_t flags)
{
+ int ret;
uint8_t *p;
bool reply_supported = virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_REPLY_ACK);
@@ -2244,7 +2474,7 @@ static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
if (!virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_CONFIG)) {
- return -1;
+ return -ENOTSUP;
}
if (reply_supported) {
@@ -2252,7 +2482,7 @@ static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
}
if (size > VHOST_USER_MAX_CONFIG_SIZE) {
- return -1;
+ return -EINVAL;
}
msg.payload.config.offset = offset,
@@ -2261,8 +2491,9 @@ static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
p = msg.payload.config.region;
memcpy(p, data, size);
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- return -1;
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
+ return ret;
}
if (reply_supported) {
@@ -2276,9 +2507,10 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev,
void *session_info,
uint64_t *session_id)
{
+ int ret;
bool crypto_session = virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
- CryptoDevBackendSymSessionInfo *sess_info = session_info;
+ CryptoDevBackendSessionInfo *backend_info = session_info;
VhostUserMsg msg = {
.hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
.hdr.flags = VHOST_USER_VERSION,
@@ -2289,44 +2521,85 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev,
if (!crypto_session) {
error_report("vhost-user trying to send unhandled ioctl");
- return -1;
+ return -ENOTSUP;
}
- memcpy(&msg.payload.session.session_setup_data, sess_info,
- sizeof(CryptoDevBackendSymSessionInfo));
- if (sess_info->key_len) {
- memcpy(&msg.payload.session.key, sess_info->cipher_key,
- sess_info->key_len);
- }
- if (sess_info->auth_key_len > 0) {
- memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
- sess_info->auth_key_len);
+ if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) {
+ CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info;
+ size_t keylen;
+
+ memcpy(&msg.payload.session.u.asym.session_setup_data, sess,
+ sizeof(CryptoDevBackendAsymSessionInfo));
+ if (sess->keylen) {
+ keylen = sizeof(msg.payload.session.u.asym.key);
+ if (sess->keylen > keylen) {
+ error_report("Unsupported asymmetric key size");
+ return -ENOTSUP;
+ }
+
+ memcpy(&msg.payload.session.u.asym.key, sess->key,
+ sess->keylen);
+ }
+ } else {
+ CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info;
+ size_t keylen;
+
+ memcpy(&msg.payload.session.u.sym.session_setup_data, sess,
+ sizeof(CryptoDevBackendSymSessionInfo));
+ if (sess->key_len) {
+ keylen = sizeof(msg.payload.session.u.sym.key);
+ if (sess->key_len > keylen) {
+ error_report("Unsupported cipher key size");
+ return -ENOTSUP;
+ }
+
+ memcpy(&msg.payload.session.u.sym.key, sess->cipher_key,
+ sess->key_len);
+ }
+
+ if (sess->auth_key_len > 0) {
+ keylen = sizeof(msg.payload.session.u.sym.auth_key);
+ if (sess->auth_key_len > keylen) {
+ error_report("Unsupported auth key size");
+ return -ENOTSUP;
+ }
+
+ memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key,
+ sess->auth_key_len);
+ }
}
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- error_report("vhost_user_write() return -1, create session failed");
- return -1;
+
+ msg.payload.session.op_code = backend_info->op_code;
+ msg.payload.session.session_id = backend_info->session_id;
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
+ error_report("vhost_user_write() return %d, create session failed",
+ ret);
+ return ret;
}
- if (vhost_user_read(dev, &msg) < 0) {
- error_report("vhost_user_read() return -1, create session failed");
- return -1;
+ ret = vhost_user_read(dev, &msg);
+ if (ret < 0) {
+ error_report("vhost_user_read() return %d, create session failed",
+ ret);
+ return ret;
}
if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
error_report("Received unexpected msg type. Expected %d received %d",
VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
- return -1;
+ return -EPROTO;
}
if (msg.hdr.size != sizeof(msg.payload.session)) {
error_report("Received bad msg size.");
- return -1;
+ return -EPROTO;
}
if (msg.payload.session.session_id < 0) {
error_report("Bad session id: %" PRId64 "",
msg.payload.session.session_id);
- return -1;
+ return -EINVAL;
}
*session_id = msg.payload.session.session_id;
@@ -2336,6 +2609,7 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev,
static int
vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
{
+ int ret;
bool crypto_session = virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
VhostUserMsg msg = {
@@ -2347,25 +2621,22 @@ vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
if (!crypto_session) {
error_report("vhost-user trying to send unhandled ioctl");
- return -1;
+ return -ENOTSUP;
}
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- error_report("vhost_user_write() return -1, close session failed");
- return -1;
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
+ error_report("vhost_user_write() return %d, close session failed",
+ ret);
+ return ret;
}
return 0;
}
-static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
- MemoryRegionSection *section)
+static bool vhost_user_no_private_memslots(struct vhost_dev *dev)
{
- bool result;
-
- result = memory_region_get_fd(section->mr) >= 0;
-
- return result;
+ return true;
}
static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
@@ -2374,6 +2645,7 @@ static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
{
void *addr;
int fd;
+ int ret;
struct vhost_user *u = dev->opaque;
CharBackend *chr = u->user->chr;
VhostUserMsg msg = {
@@ -2389,24 +2661,26 @@ static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
return 0;
}
- if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
- return -1;
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
+ return ret;
}
- if (vhost_user_read(dev, &msg) < 0) {
- return -1;
+ ret = vhost_user_read(dev, &msg);
+ if (ret < 0) {
+ return ret;
}
if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
error_report("Received unexpected msg type. "
"Expected %d received %d",
VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
- return -1;
+ return -EPROTO;
}
if (msg.hdr.size != sizeof(msg.payload.inflight)) {
error_report("Received bad msg size.");
- return -1;
+ return -EPROTO;
}
if (!msg.payload.inflight.mmap_size) {
@@ -2416,7 +2690,7 @@ static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
fd = qemu_chr_fe_get_msgfd(chr);
if (fd < 0) {
error_report("Failed to get mem fd");
- return -1;
+ return -EIO;
}
addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
@@ -2425,7 +2699,7 @@ static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
if (addr == MAP_FAILED) {
error_report("Failed to mmap mem fd");
close(fd);
- return -1;
+ return -EFAULT;
}
inflight->addr = addr;
@@ -2455,11 +2729,21 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
return 0;
}
- if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) {
- return -1;
- }
+ return vhost_user_write(dev, &msg, &inflight->fd, 1);
+}
- return 0;
+static void vhost_user_state_destroy(gpointer data)
+{
+ VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
+ if (n) {
+ vhost_user_host_notifier_remove(n, NULL);
+ object_unparent(OBJECT(&n->mr));
+ /*
+ * We can't free until vhost_user_host_notifier_remove has
+ * done it's thing so schedule the free with RCU.
+ */
+ g_free_rcu(n, rcu);
+ }
}
bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
@@ -2470,33 +2754,272 @@ bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
}
user->chr = chr;
user->memory_slots = 0;
+ user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4,
+ &vhost_user_state_destroy);
return true;
}
void vhost_user_cleanup(VhostUserState *user)
{
- int i;
-
if (!user->chr) {
return;
}
memory_region_transaction_begin();
- for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
- if (user->notifier[i].addr) {
- object_unparent(OBJECT(&user->notifier[i].mr));
- munmap(user->notifier[i].addr, qemu_real_host_page_size);
- user->notifier[i].addr = NULL;
- }
- }
+ user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
memory_region_transaction_commit();
user->chr = NULL;
}
+
+typedef struct {
+ vu_async_close_fn cb;
+ DeviceState *dev;
+ CharBackend *cd;
+ struct vhost_dev *vhost;
+ IOEventHandler *event_cb;
+} VhostAsyncCallback;
+
+static void vhost_user_async_close_bh(void *opaque)
+{
+ VhostAsyncCallback *data = opaque;
+ struct vhost_dev *vhost = data->vhost;
+
+ /*
+ * If the vhost_dev has been cleared in the meantime there is
+ * nothing left to do as some other path has completed the
+ * cleanup.
+ */
+ if (vhost->vdev) {
+ data->cb(data->dev);
+ } else if (data->event_cb) {
+ qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb,
+ NULL, data->dev, NULL, true);
+ }
+
+ g_free(data);
+}
+
+/*
+ * We only schedule the work if the machine is running. If suspended
+ * we want to keep all the in-flight data as is for migration
+ * purposes.
+ */
+void vhost_user_async_close(DeviceState *d,
+ CharBackend *chardev, struct vhost_dev *vhost,
+ vu_async_close_fn cb,
+ IOEventHandler *event_cb)
+{
+ if (!runstate_check(RUN_STATE_SHUTDOWN)) {
+ /*
+ * A close event may happen during a read/write, but vhost
+ * code assumes the vhost_dev remains setup, so delay the
+ * stop & clear.
+ */
+ AioContext *ctx = qemu_get_current_aio_context();
+ VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1);
+
+ /* Save data for the callback */
+ data->cb = cb;
+ data->dev = d;
+ data->cd = chardev;
+ data->vhost = vhost;
+ data->event_cb = event_cb;
+
+ /* Disable any further notifications on the chardev */
+ qemu_chr_fe_set_handlers(chardev,
+ NULL, NULL, NULL, NULL, NULL, NULL,
+ false);
+
+ aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data);
+
+ /*
+ * Move vhost device to the stopped state. The vhost-user device
+ * will be clean up and disconnected in BH. This can be useful in
+ * the vhost migration code. If disconnect was caught there is an
+ * option for the general vhost code to get the dev state without
+ * knowing its type (in this case vhost-user).
+ *
+ * Note if the vhost device is fully cleared by the time we
+ * execute the bottom half we won't continue with the cleanup.
+ */
+ vhost->started = false;
+ }
+}
+
+static int vhost_user_dev_start(struct vhost_dev *dev, bool started)
+{
+ if (!virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_STATUS)) {
+ return 0;
+ }
+
+ /* Set device status only for last queue pair */
+ if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
+ return 0;
+ }
+
+ if (started) {
+ return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
+ VIRTIO_CONFIG_S_DRIVER |
+ VIRTIO_CONFIG_S_DRIVER_OK);
+ } else {
+ return 0;
+ }
+}
+
+static void vhost_user_reset_status(struct vhost_dev *dev)
+{
+ /* Set device status only for last queue pair */
+ if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
+ return;
+ }
+
+ if (virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_STATUS)) {
+ vhost_user_set_status(dev, 0);
+ }
+}
+
+static bool vhost_user_supports_device_state(struct vhost_dev *dev)
+{
+ return virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_DEVICE_STATE);
+}
+
+static int vhost_user_set_device_state_fd(struct vhost_dev *dev,
+ VhostDeviceStateDirection direction,
+ VhostDeviceStatePhase phase,
+ int fd,
+ int *reply_fd,
+ Error **errp)
+{
+ int ret;
+ struct vhost_user *vu = dev->opaque;
+ VhostUserMsg msg = {
+ .hdr = {
+ .request = VHOST_USER_SET_DEVICE_STATE_FD,
+ .flags = VHOST_USER_VERSION,
+ .size = sizeof(msg.payload.transfer_state),
+ },
+ .payload.transfer_state = {
+ .direction = direction,
+ .phase = phase,
+ },
+ };
+
+ *reply_fd = -1;
+
+ if (!vhost_user_supports_device_state(dev)) {
+ close(fd);
+ error_setg(errp, "Back-end does not support migration state transfer");
+ return -ENOTSUP;
+ }
+
+ ret = vhost_user_write(dev, &msg, &fd, 1);
+ close(fd);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "Failed to send SET_DEVICE_STATE_FD message");
+ return ret;
+ }
+
+ ret = vhost_user_read(dev, &msg);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "Failed to receive SET_DEVICE_STATE_FD reply");
+ return ret;
+ }
+
+ if (msg.hdr.request != VHOST_USER_SET_DEVICE_STATE_FD) {
+ error_setg(errp,
+ "Received unexpected message type, expected %d, received %d",
+ VHOST_USER_SET_DEVICE_STATE_FD, msg.hdr.request);
+ return -EPROTO;
+ }
+
+ if (msg.hdr.size != sizeof(msg.payload.u64)) {
+ error_setg(errp,
+ "Received bad message size, expected %zu, received %" PRIu32,
+ sizeof(msg.payload.u64), msg.hdr.size);
+ return -EPROTO;
+ }
+
+ if ((msg.payload.u64 & 0xff) != 0) {
+ error_setg(errp, "Back-end did not accept migration state transfer");
+ return -EIO;
+ }
+
+ if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
+ *reply_fd = qemu_chr_fe_get_msgfd(vu->user->chr);
+ if (*reply_fd < 0) {
+ error_setg(errp,
+ "Failed to get back-end-provided transfer pipe FD");
+ *reply_fd = -1;
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+static int vhost_user_check_device_state(struct vhost_dev *dev, Error **errp)
+{
+ int ret;
+ VhostUserMsg msg = {
+ .hdr = {
+ .request = VHOST_USER_CHECK_DEVICE_STATE,
+ .flags = VHOST_USER_VERSION,
+ .size = 0,
+ },
+ };
+
+ if (!vhost_user_supports_device_state(dev)) {
+ error_setg(errp, "Back-end does not support migration state transfer");
+ return -ENOTSUP;
+ }
+
+ ret = vhost_user_write(dev, &msg, NULL, 0);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "Failed to send CHECK_DEVICE_STATE message");
+ return ret;
+ }
+
+ ret = vhost_user_read(dev, &msg);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "Failed to receive CHECK_DEVICE_STATE reply");
+ return ret;
+ }
+
+ if (msg.hdr.request != VHOST_USER_CHECK_DEVICE_STATE) {
+ error_setg(errp,
+ "Received unexpected message type, expected %d, received %d",
+ VHOST_USER_CHECK_DEVICE_STATE, msg.hdr.request);
+ return -EPROTO;
+ }
+
+ if (msg.hdr.size != sizeof(msg.payload.u64)) {
+ error_setg(errp,
+ "Received bad message size, expected %zu, received %" PRIu32,
+ sizeof(msg.payload.u64), msg.hdr.size);
+ return -EPROTO;
+ }
+
+ if (msg.payload.u64 != 0) {
+ error_setg(errp, "Back-end failed to process its internal state");
+ return -EIO;
+ }
+
+ return 0;
+}
+
const VhostOps user_ops = {
.backend_type = VHOST_BACKEND_TYPE_USER,
.vhost_backend_init = vhost_user_backend_init,
.vhost_backend_cleanup = vhost_user_backend_cleanup,
.vhost_backend_memslots_limit = vhost_user_memslots_limit,
+ .vhost_backend_no_private_memslots = vhost_user_no_private_memslots,
.vhost_set_log_base = vhost_user_set_log_base,
.vhost_set_mem_table = vhost_user_set_mem_table,
.vhost_set_vring_addr = vhost_user_set_vring_addr,
@@ -2506,6 +3029,7 @@ const VhostOps user_ops = {
.vhost_get_vring_base = vhost_user_get_vring_base,
.vhost_set_vring_kick = vhost_user_set_vring_kick,
.vhost_set_vring_call = vhost_user_set_vring_call,
+ .vhost_set_vring_err = vhost_user_set_vring_err,
.vhost_set_features = vhost_user_set_features,
.vhost_get_features = vhost_user_get_features,
.vhost_set_owner = vhost_user_set_owner,
@@ -2514,7 +3038,6 @@ const VhostOps user_ops = {
.vhost_set_vring_enable = vhost_user_set_vring_enable,
.vhost_requires_shm_log = vhost_user_requires_shm_log,
.vhost_migration_done = vhost_user_migration_done,
- .vhost_backend_can_merge = vhost_user_can_merge,
.vhost_net_set_mtu = vhost_user_net_set_mtu,
.vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
.vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
@@ -2522,7 +3045,11 @@ const VhostOps user_ops = {
.vhost_set_config = vhost_user_set_config,
.vhost_crypto_create_session = vhost_user_crypto_create_session,
.vhost_crypto_close_session = vhost_user_crypto_close_session,
- .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
.vhost_get_inflight_fd = vhost_user_get_inflight_fd,
.vhost_set_inflight_fd = vhost_user_set_inflight_fd,
+ .vhost_dev_start = vhost_user_dev_start,
+ .vhost_reset_status = vhost_user_reset_status,
+ .vhost_supports_device_state = vhost_user_supports_device_state,
+ .vhost_set_device_state_fd = vhost_user_set_device_state_fd,
+ .vhost_check_device_state = vhost_user_check_device_state,
};