diff options
Diffstat (limited to 'hw/virtio/vhost-user.c')
-rw-r--r-- | hw/virtio/vhost-user.c | 1601 |
1 files changed, 1064 insertions, 537 deletions
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 2c8556237f..cdf9af4a4b 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -10,7 +10,9 @@ #include "qemu/osdep.h" #include "qapi/error.h" +#include "hw/virtio/virtio-dmabuf.h" #include "hw/virtio/vhost.h" +#include "hw/virtio/virtio-crypto.h" #include "hw/virtio/vhost-user.h" #include "hw/virtio/vhost-backend.h" #include "hw/virtio/virtio.h" @@ -20,11 +22,13 @@ #include "sysemu/kvm.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" +#include "qemu/uuid.h" #include "qemu/sockets.h" +#include "sysemu/runstate.h" #include "sysemu/cryptodev.h" -#include "migration/migration.h" #include "migration/postcopy-ram.h" #include "trace.h" +#include "exec/ramblock.h" #include <sys/ioctl.h> #include <sys/socket.h> @@ -38,19 +42,9 @@ #define VHOST_MEMORY_BASELINE_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 -#define VHOST_USER_SLAVE_MAX_FDS 8 +#define VHOST_USER_BACKEND_MAX_FDS 8 -/* - * Set maximum number of RAM slots supported to - * the maximum number supported by the target - * hardware plaform. - */ -#if defined(TARGET_X86) || defined(TARGET_X86_64) || \ - defined(TARGET_ARM) || defined(TARGET_ARM_64) -#include "hw/acpi/acpi.h" -#define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS - -#elif defined(TARGET_PPC) || defined(TARGET_PPC_64) +#if defined(TARGET_PPC) || defined(TARGET_PPC64) #include "hw/ppc/spapr.h" #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS @@ -63,26 +57,6 @@ */ #define VHOST_USER_MAX_CONFIG_SIZE 256 -enum VhostUserProtocolFeature { - VHOST_USER_PROTOCOL_F_MQ = 0, - VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, - VHOST_USER_PROTOCOL_F_RARP = 2, - VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, - VHOST_USER_PROTOCOL_F_NET_MTU = 4, - VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, - VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, - VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, - VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, - VHOST_USER_PROTOCOL_F_CONFIG = 9, - VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, - VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, - VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, - /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ - VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, - VHOST_USER_PROTOCOL_F_MAX -}; - #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) typedef enum VhostUserRequest { @@ -107,7 +81,7 @@ typedef enum VhostUserRequest { VHOST_USER_SET_VRING_ENABLE = 18, VHOST_USER_SEND_RARP = 19, VHOST_USER_NET_SET_MTU = 20, - VHOST_USER_SET_SLAVE_REQ_FD = 21, + VHOST_USER_SET_BACKEND_REQ_FD = 21, VHOST_USER_IOTLB_MSG = 22, VHOST_USER_SET_VRING_ENDIAN = 23, VHOST_USER_GET_CONFIG = 24, @@ -125,16 +99,24 @@ typedef enum VhostUserRequest { VHOST_USER_GET_MAX_MEM_SLOTS = 36, VHOST_USER_ADD_MEM_REG = 37, VHOST_USER_REM_MEM_REG = 38, + VHOST_USER_SET_STATUS = 39, + VHOST_USER_GET_STATUS = 40, + VHOST_USER_GET_SHARED_OBJECT = 41, + VHOST_USER_SET_DEVICE_STATE_FD = 42, + VHOST_USER_CHECK_DEVICE_STATE = 43, VHOST_USER_MAX } VhostUserRequest; -typedef enum VhostUserSlaveRequest { - VHOST_USER_SLAVE_NONE = 0, - VHOST_USER_SLAVE_IOTLB_MSG = 1, - VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, - VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, - VHOST_USER_SLAVE_MAX -} VhostUserSlaveRequest; +typedef enum VhostUserBackendRequest { + VHOST_USER_BACKEND_NONE = 0, + VHOST_USER_BACKEND_IOTLB_MSG = 1, + VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, + VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6, + VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7, + VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8, + VHOST_USER_BACKEND_MAX +} VhostUserBackendRequest; typedef struct VhostUserMemoryRegion { uint64_t guest_phys_addr; @@ -168,13 +150,24 @@ typedef struct VhostUserConfig { #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 +#define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024 typedef struct VhostUserCryptoSession { + uint64_t op_code; + union { + struct { + CryptoDevBackendSymSessionInfo session_setup_data; + uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; + uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; + } sym; + struct { + CryptoDevBackendAsymSessionInfo session_setup_data; + uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN]; + } asym; + } u; + /* session id for success, -1 on errors */ int64_t session_id; - CryptoDevBackendSymSessionInfo session_setup_data; - uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; - uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; } VhostUserCryptoSession; static VhostUserConfig c __attribute__ ((unused)); @@ -195,19 +188,29 @@ typedef struct VhostUserInflight { uint16_t queue_size; } VhostUserInflight; +typedef struct VhostUserShared { + unsigned char uuid[16]; +} VhostUserShared; + typedef struct { VhostUserRequest request; #define VHOST_USER_VERSION_MASK (0x3) -#define VHOST_USER_REPLY_MASK (0x1<<2) +#define VHOST_USER_REPLY_MASK (0x1 << 2) #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) uint32_t flags; uint32_t size; /* the following payload size */ } QEMU_PACKED VhostUserHeader; +/* Request payload of VHOST_USER_SET_DEVICE_STATE_FD */ +typedef struct VhostUserTransferDeviceState { + uint32_t direction; + uint32_t phase; +} VhostUserTransferDeviceState; + typedef union { #define VHOST_USER_VRING_IDX_MASK (0xff) -#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) +#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) uint64_t u64; struct vhost_vring_state state; struct vhost_vring_addr addr; @@ -219,6 +222,8 @@ typedef union { VhostUserCryptoSession session; VhostUserVringArea area; VhostUserInflight inflight; + VhostUserShared object; + VhostUserTransferDeviceState transfer_state; } VhostUserPayload; typedef struct VhostUserMsg { @@ -238,8 +243,8 @@ struct vhost_user { struct vhost_dev *dev; /* Shared between vhost devs of the same virtio device */ VhostUserState *user; - QIOChannel *slave_ioc; - GSource *slave_src; + QIOChannel *backend_ioc; + GSource *backend_src; NotifierWithReturn postcopy_notifier; struct PostCopyFD postcopy_fd; uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; @@ -247,7 +252,8 @@ struct vhost_user { size_t region_rb_len; /* RAMBlock associated with a given region */ RAMBlock **region_rb; - /* The offset from the start of the RAMBlock to the start of the + /* + * The offset from the start of the RAMBlock to the start of the * vhost region. */ ram_addr_t *region_rb_offset; @@ -266,11 +272,6 @@ struct scrub_regions { int fd_idx; }; -static bool ioeventfd_enabled(void) -{ - return !kvm_enabled() || kvm_eventfds_enabled(); -} - static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) { struct vhost_user *u = dev->opaque; @@ -280,9 +281,10 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) r = qemu_chr_fe_read_all(chr, p, size); if (r != size) { + int saved_errno = errno; error_report("Failed to read msg header. Read %d instead of %d." " Original request %d.", r, size, msg->hdr.request); - return -1; + return r < 0 ? -saved_errno : -EIO; } /* validate received flags */ @@ -290,33 +292,24 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) error_report("Failed to read msg header." " Flags 0x%x instead of 0x%x.", msg->hdr.flags, VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); - return -1; + return -EPROTO; } + trace_vhost_user_read(msg->hdr.request, msg->hdr.flags); + return 0; } -struct vhost_user_read_cb_data { - struct vhost_dev *dev; - VhostUserMsg *msg; - GMainLoop *loop; - int ret; -}; - -static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, - gpointer opaque) +static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) { - struct vhost_user_read_cb_data *data = opaque; - struct vhost_dev *dev = data->dev; - VhostUserMsg *msg = data->msg; struct vhost_user *u = dev->opaque; CharBackend *chr = u->user->chr; uint8_t *p = (uint8_t *) msg; int r, size; - if (vhost_user_read_header(dev, msg) < 0) { - data->ret = -1; - goto end; + r = vhost_user_read_header(dev, msg); + if (r < 0) { + return r; } /* validate message size is sane */ @@ -324,8 +317,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, error_report("Failed to read msg header." " Size %d exceeds the maximum %zu.", msg->hdr.size, VHOST_USER_PAYLOAD_SIZE); - data->ret = -1; - goto end; + return -EPROTO; } if (msg->hdr.size) { @@ -333,112 +325,42 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, size = msg->hdr.size; r = qemu_chr_fe_read_all(chr, p, size); if (r != size) { + int saved_errno = errno; error_report("Failed to read msg payload." " Read %d instead of %d.", r, msg->hdr.size); - data->ret = -1; - goto end; + return r < 0 ? -saved_errno : -EIO; } } -end: - g_main_loop_quit(data->loop); - return G_SOURCE_REMOVE; -} - -static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, - gpointer opaque); - -/* - * This updates the read handler to use a new event loop context. - * Event sources are removed from the previous context : this ensures - * that events detected in the previous context are purged. They will - * be re-detected and processed in the new context. - */ -static void slave_update_read_handler(struct vhost_dev *dev, - GMainContext *ctxt) -{ - struct vhost_user *u = dev->opaque; - - if (!u->slave_ioc) { - return; - } - - if (u->slave_src) { - g_source_destroy(u->slave_src); - g_source_unref(u->slave_src); - } - - u->slave_src = qio_channel_add_watch_source(u->slave_ioc, - G_IO_IN | G_IO_HUP, - slave_read, dev, NULL, - ctxt); -} - -static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) -{ - struct vhost_user *u = dev->opaque; - CharBackend *chr = u->user->chr; - GMainContext *prev_ctxt = chr->chr->gcontext; - GMainContext *ctxt = g_main_context_new(); - GMainLoop *loop = g_main_loop_new(ctxt, FALSE); - struct vhost_user_read_cb_data data = { - .dev = dev, - .loop = loop, - .msg = msg, - .ret = 0 - }; - - /* - * We want to be able to monitor the slave channel fd while waiting - * for chr I/O. This requires an event loop, but we can't nest the - * one to which chr is currently attached : its fd handlers might not - * be prepared for re-entrancy. So we create a new one and switch chr - * to use it. - */ - slave_update_read_handler(dev, ctxt); - qemu_chr_be_update_read_handlers(chr->chr, ctxt); - qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); - - g_main_loop_run(loop); - - /* - * Restore the previous event loop context. This also destroys/recreates - * event sources : this guarantees that all pending events in the original - * context that have been processed by the nested loop are purged. - */ - qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); - slave_update_read_handler(dev, NULL); - - g_main_loop_unref(loop); - g_main_context_unref(ctxt); - - return data.ret; + return 0; } static int process_message_reply(struct vhost_dev *dev, const VhostUserMsg *msg) { + int ret; VhostUserMsg msg_reply; if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { return 0; } - if (vhost_user_read(dev, &msg_reply) < 0) { - return -1; + ret = vhost_user_read(dev, &msg_reply); + if (ret < 0) { + return ret; } if (msg_reply.hdr.request != msg->hdr.request) { error_report("Received unexpected msg type. " "Expected %d received %d", msg->hdr.request, msg_reply.hdr.request); - return -1; + return -EPROTO; } - return msg_reply.payload.u64 ? -1 : 0; + return msg_reply.payload.u64 ? -EIO : 0; } -static bool vhost_user_one_time_request(VhostUserRequest request) +static bool vhost_user_per_device_request(VhostUserRequest request) { switch (request) { case VHOST_USER_SET_OWNER: @@ -446,6 +368,9 @@ static bool vhost_user_one_time_request(VhostUserRequest request) case VHOST_USER_SET_MEM_TABLE: case VHOST_USER_GET_QUEUE_NUM: case VHOST_USER_NET_SET_MTU: + case VHOST_USER_RESET_DEVICE: + case VHOST_USER_ADD_MEM_REG: + case VHOST_USER_REM_MEM_REG: return true; default: return false; @@ -461,27 +386,36 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; /* - * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, - * we just need send it once in the first time. For later such - * request, we just ignore it. + * Some devices, like virtio-scsi, are implemented as a single vhost_dev, + * while others, like virtio-net, contain multiple vhost_devs. For + * operations such as configuring device memory mappings or issuing device + * resets, which affect the whole device instead of individual VQs, + * vhost-user messages should only be sent once. + * + * Devices with multiple vhost_devs are given an associated dev->vq_index + * so per_device requests are only sent if vq_index is 0. */ - if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { + if (vhost_user_per_device_request(msg->hdr.request) + && dev->vq_index != 0) { msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; return 0; } if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { error_report("Failed to set msg fds."); - return -1; + return -EINVAL; } ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); if (ret != size) { + int saved_errno = errno; error_report("Failed to write msg." " Wrote %d instead of %d.", ret, size); - return -1; + return ret < 0 ? -saved_errno : -EIO; } + trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); + return 0; } @@ -502,6 +436,7 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, size_t fd_num = 0; bool shmfd = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_LOG_SHMFD); + int ret; VhostUserMsg msg = { .hdr.request = VHOST_USER_SET_LOG_BASE, .hdr.flags = VHOST_USER_VERSION, @@ -510,25 +445,32 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, .hdr.size = sizeof(msg.payload.log), }; + /* Send only once with first queue pair */ + if (dev->vq_index != 0) { + return 0; + } + if (shmfd && log->fd != -1) { fds[fd_num++] = log->fd; } - if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { - return -1; + ret = vhost_user_write(dev, &msg, fds, fd_num); + if (ret < 0) { + return ret; } if (shmfd) { msg.hdr.size = 0; - if (vhost_user_read(dev, &msg) < 0) { - return -1; + ret = vhost_user_read(dev, &msg); + if (ret < 0) { + return ret; } if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { error_report("Received unexpected msg type. " "Expected %d received %d", VHOST_USER_SET_LOG_BASE, msg.hdr.request); - return -1; + return -EPROTO; } } @@ -543,6 +485,7 @@ static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, assert((uintptr_t)addr == addr); mr = memory_region_from_host((void *)(uintptr_t)addr, offset); *fd = memory_region_get_fd(mr); + *offset += mr->ram_block->fd_offset; return mr; } @@ -588,7 +531,7 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, u->region_rb[i] = mr->ram_block; } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { error_report("Failed preparing vhost-user memory table msg"); - return -1; + return -ENOBUFS; } vhost_user_fill_msg_region(®ion_buffer, reg, offset); msg->payload.memory.regions[*fd_num] = region_buffer; @@ -604,14 +547,14 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, if (!*fd_num) { error_report("Failed initializing vhost-user memory map, " "consider using -object memory-backend-file share=on"); - return -1; + return -EINVAL; } msg->hdr.size = sizeof(msg->payload.memory.nregions); msg->hdr.size += sizeof(msg->payload.memory.padding); msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); - return 1; + return 0; } static inline bool reg_equal(struct vhost_memory_region *shadow_reg, @@ -741,8 +684,9 @@ static int send_remove_regions(struct vhost_dev *dev, vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); msg->payload.mem_reg.region = region_buffer; - if (vhost_user_write(dev, msg, &fd, 1) < 0) { - return -1; + ret = vhost_user_write(dev, msg, NULL, 0); + if (ret < 0) { + return ret; } if (reply_supported) { @@ -801,15 +745,17 @@ static int send_add_regions(struct vhost_dev *dev, vhost_user_fill_msg_region(®ion_buffer, reg, offset); msg->payload.mem_reg.region = region_buffer; - if (vhost_user_write(dev, msg, &fd, 1) < 0) { - return -1; + ret = vhost_user_write(dev, msg, &fd, 1); + if (ret < 0) { + return ret; } if (track_ramblocks) { uint64_t reply_gpa; - if (vhost_user_read(dev, &msg_reply) < 0) { - return -1; + ret = vhost_user_read(dev, &msg_reply); + if (ret < 0) { + return ret; } reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; @@ -819,7 +765,7 @@ static int send_add_regions(struct vhost_dev *dev, "Expected %d received %d", __func__, VHOST_USER_ADD_MEM_REG, msg_reply.hdr.request); - return -1; + return -EPROTO; } /* @@ -830,7 +776,7 @@ static int send_add_regions(struct vhost_dev *dev, error_report("%s: Unexpected size for postcopy reply " "%d vs %d", __func__, msg_reply.hdr.size, msg->hdr.size); - return -1; + return -EPROTO; } /* Get the postcopy client base from the backend's reply. */ @@ -846,7 +792,7 @@ static int send_add_regions(struct vhost_dev *dev, "Got guest physical address %" PRIX64 ", expected " "%" PRIX64, __func__, reply_gpa, dev->mem->regions[reg_idx].guest_phys_addr); - return -1; + return -EPROTO; } } else if (reply_supported) { ret = process_message_reply(dev, msg); @@ -887,6 +833,7 @@ static int vhost_user_add_remove_regions(struct vhost_dev *dev, struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; int nr_add_reg, nr_rem_reg; + int ret; msg->hdr.size = sizeof(msg->payload.mem_reg); @@ -894,16 +841,20 @@ static int vhost_user_add_remove_regions(struct vhost_dev *dev, scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, shadow_pcb, track_ramblocks); - if (nr_rem_reg && send_remove_regions(dev, rem_reg, nr_rem_reg, msg, - reply_supported) < 0) - { - goto err; + if (nr_rem_reg) { + ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, + reply_supported); + if (ret < 0) { + goto err; + } } - if (nr_add_reg && send_add_regions(dev, add_reg, nr_add_reg, msg, - shadow_pcb, reply_supported, track_ramblocks) < 0) - { - goto err; + if (nr_add_reg) { + ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, + reply_supported, track_ramblocks); + if (ret < 0) { + goto err; + } } if (track_ramblocks) { @@ -918,8 +869,9 @@ static int vhost_user_add_remove_regions(struct vhost_dev *dev, msg->hdr.size = sizeof(msg->payload.u64); msg->payload.u64 = 0; /* OK */ - if (vhost_user_write(dev, msg, NULL, 0) < 0) { - return -1; + ret = vhost_user_write(dev, msg, NULL, 0); + if (ret < 0) { + return ret; } } @@ -931,7 +883,7 @@ err: sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); } - return -1; + return ret; } static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, @@ -944,6 +896,7 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, size_t fd_num = 0; VhostUserMsg msg_reply; int region_i, msg_i; + int ret; VhostUserMsg msg = { .hdr.flags = VHOST_USER_VERSION, @@ -961,29 +914,32 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, } if (config_mem_slots) { - if (vhost_user_add_remove_regions(dev, &msg, reply_supported, - true) < 0) { - return -1; + ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); + if (ret < 0) { + return ret; } } else { - if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, - true) < 0) { - return -1; + ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, + true); + if (ret < 0) { + return ret; } - if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { - return -1; + ret = vhost_user_write(dev, &msg, fds, fd_num); + if (ret < 0) { + return ret; } - if (vhost_user_read(dev, &msg_reply) < 0) { - return -1; + ret = vhost_user_read(dev, &msg_reply); + if (ret < 0) { + return ret; } if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { error_report("%s: Received unexpected msg type." "Expected %d received %d", __func__, VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); - return -1; + return -EPROTO; } /* @@ -994,7 +950,7 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, error_report("%s: Unexpected size for postcopy reply " "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size); - return -1; + return -EPROTO; } memset(u->postcopy_client_bases, 0, @@ -1024,7 +980,7 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, error_report("%s: postcopy reply not fully consumed " "%d vs %zd", __func__, msg_i, fd_num); - return -1; + return -EIO; } /* @@ -1035,8 +991,9 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, /* TODO: Use this for failure cases as well with a bad value. */ msg.hdr.size = sizeof(msg.payload.u64); msg.payload.u64 = 0; /* OK */ - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + return ret; } } @@ -1055,6 +1012,7 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, bool config_mem_slots = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); + int ret; if (do_postcopy) { /* @@ -1074,17 +1032,20 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, } if (config_mem_slots) { - if (vhost_user_add_remove_regions(dev, &msg, reply_supported, - false) < 0) { - return -1; + ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); + if (ret < 0) { + return ret; } } else { - if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, - false) < 0) { - return -1; + ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, + false); + if (ret < 0) { + return ret; } - if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { - return -1; + + ret = vhost_user_write(dev, &msg, fds, fd_num); + if (ret < 0) { + return ret; } if (reply_supported) { @@ -1109,11 +1070,92 @@ static int vhost_user_set_vring_endian(struct vhost_dev *dev, if (!cross_endian) { error_report("vhost-user trying to send unhandled ioctl"); - return -1; + return -ENOTSUP; + } + + return vhost_user_write(dev, &msg, NULL, 0); +} + +static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) +{ + int ret; + VhostUserMsg msg = { + .hdr.request = request, + .hdr.flags = VHOST_USER_VERSION, + }; + + if (vhost_user_per_device_request(request) && dev->vq_index != 0) { + return 0; + } + + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + return ret; + } + + ret = vhost_user_read(dev, &msg); + if (ret < 0) { + return ret; + } + + if (msg.hdr.request != request) { + error_report("Received unexpected msg type. Expected %d received %d", + request, msg.hdr.request); + return -EPROTO; + } + + if (msg.hdr.size != sizeof(msg.payload.u64)) { + error_report("Received bad msg size."); + return -EPROTO; + } + + *u64 = msg.payload.u64; + + return 0; +} + +static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) +{ + if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { + return -EPROTO; + } + + return 0; +} + +/* Note: "msg->hdr.flags" may be modified. */ +static int vhost_user_write_sync(struct vhost_dev *dev, VhostUserMsg *msg, + bool wait_for_reply) +{ + int ret; + + if (wait_for_reply) { + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + if (reply_supported) { + msg->hdr.flags |= VHOST_USER_NEED_REPLY_MASK; + } } - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; + ret = vhost_user_write(dev, msg, NULL, 0); + if (ret < 0) { + return ret; + } + + if (wait_for_reply) { + uint64_t dummy; + + if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { + return process_message_reply(dev, msg); + } + + /* + * We need to wait for a reply but the backend does not + * support replies for the command we just sent. + * Send VHOST_USER_GET_FEATURES which makes all backends + * send a reply. + */ + return vhost_user_get_features(dev, &dummy); } return 0; @@ -1121,7 +1163,8 @@ static int vhost_user_set_vring_endian(struct vhost_dev *dev, static int vhost_set_vring(struct vhost_dev *dev, unsigned long int request, - struct vhost_vring_state *ring) + struct vhost_vring_state *ring, + bool wait_for_reply) { VhostUserMsg msg = { .hdr.request = request, @@ -1130,51 +1173,44 @@ static int vhost_set_vring(struct vhost_dev *dev, .hdr.size = sizeof(msg.payload.state), }; - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; - } - - return 0; + return vhost_user_write_sync(dev, &msg, wait_for_reply); } static int vhost_user_set_vring_num(struct vhost_dev *dev, struct vhost_vring_state *ring) { - return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); + return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring, false); } -static void vhost_user_host_notifier_restore(struct vhost_dev *dev, - int queue_idx) +static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) { - struct vhost_user *u = dev->opaque; - VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; - VirtIODevice *vdev = dev->vdev; - - if (n->addr && !n->set) { - virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true); - n->set = true; - } + assert(n && n->unmap_addr); + munmap(n->unmap_addr, qemu_real_host_page_size()); + n->unmap_addr = NULL; } -static void vhost_user_host_notifier_remove(struct vhost_dev *dev, - int queue_idx) +/* + * clean-up function for notifier, will finally free the structure + * under rcu. + */ +static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, + VirtIODevice *vdev) { - struct vhost_user *u = dev->opaque; - VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; - VirtIODevice *vdev = dev->vdev; - - if (n->addr && n->set) { - virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); - n->set = false; + if (n->addr) { + if (vdev) { + virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); + } + assert(!n->unmap_addr); + n->unmap_addr = n->addr; + n->addr = NULL; + call_rcu(n, vhost_user_host_notifier_free, rcu); } } static int vhost_user_set_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { - vhost_user_host_notifier_restore(dev, ring->index); - - return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); + return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring, false); } static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) @@ -1182,50 +1218,89 @@ static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) int i; if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { - return -1; + return -EINVAL; } for (i = 0; i < dev->nvqs; ++i) { + int ret; struct vhost_vring_state state = { .index = dev->vq_index + i, .num = enable, }; - vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); + /* + * SET_VRING_ENABLE travels from guest to QEMU to vhost-user backend / + * control plane thread via unix domain socket. Virtio requests travel + * from guest to vhost-user backend / data plane thread via eventfd. + * Even if the guest enables the ring first, and pushes its first virtio + * request second (conforming to the virtio spec), the data plane thread + * in the backend may see the virtio request before the control plane + * thread sees the queue enablement. This causes (in fact, requires) the + * data plane thread to discard the virtio request (it arrived on a + * seemingly disabled queue). To prevent this out-of-order delivery, + * don't let the guest proceed to pushing the virtio request until the + * backend control plane acknowledges enabling the queue -- IOW, pass + * wait_for_reply=true below. + */ + ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state, true); + if (ret < 0) { + /* + * Restoring the previous state is likely infeasible, as well as + * proceeding regardless the error, so just bail out and hope for + * the device-level recovery. + */ + return ret; + } } return 0; } +static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, + int idx) +{ + if (idx >= u->notifiers->len) { + return NULL; + } + return g_ptr_array_index(u->notifiers, idx); +} + static int vhost_user_get_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { + int ret; VhostUserMsg msg = { .hdr.request = VHOST_USER_GET_VRING_BASE, .hdr.flags = VHOST_USER_VERSION, .payload.state = *ring, .hdr.size = sizeof(msg.payload.state), }; + struct vhost_user *u = dev->opaque; - vhost_user_host_notifier_remove(dev, ring->index); + VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); + if (n) { + vhost_user_host_notifier_remove(n, dev->vdev); + } - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + return ret; } - if (vhost_user_read(dev, &msg) < 0) { - return -1; + ret = vhost_user_read(dev, &msg); + if (ret < 0) { + return ret; } if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { error_report("Received unexpected msg type. Expected %d received %d", VHOST_USER_GET_VRING_BASE, msg.hdr.request); - return -1; + return -EPROTO; } if (msg.hdr.size != sizeof(msg.payload.state)) { error_report("Received bad msg size."); - return -1; + return -EPROTO; } *ring = msg.payload.state; @@ -1246,17 +1321,13 @@ static int vhost_set_vring_file(struct vhost_dev *dev, .hdr.size = sizeof(msg.payload.u64), }; - if (ioeventfd_enabled() && file->fd > 0) { + if (file->fd > 0) { fds[fd_num++] = file->fd; } else { msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; } - if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { - return -1; - } - - return 0; + return vhost_user_write(dev, &msg, fds, fd_num); } static int vhost_user_set_vring_kick(struct vhost_dev *dev, @@ -1271,67 +1342,10 @@ static int vhost_user_set_vring_call(struct vhost_dev *dev, return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); } - -static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) -{ - VhostUserMsg msg = { - .hdr.request = request, - .hdr.flags = VHOST_USER_VERSION, - }; - - if (vhost_user_one_time_request(request) && dev->vq_index != 0) { - return 0; - } - - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; - } - - if (vhost_user_read(dev, &msg) < 0) { - return -1; - } - - if (msg.hdr.request != request) { - error_report("Received unexpected msg type. Expected %d received %d", - request, msg.hdr.request); - return -1; - } - - if (msg.hdr.size != sizeof(msg.payload.u64)) { - error_report("Received bad msg size."); - return -1; - } - - *u64 = msg.payload.u64; - - return 0; -} - -static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) -{ - if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { - return -EPROTO; - } - - return 0; -} - -static int enforce_reply(struct vhost_dev *dev, - const VhostUserMsg *msg) +static int vhost_user_set_vring_err(struct vhost_dev *dev, + struct vhost_vring_file *file) { - uint64_t dummy; - - if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { - return process_message_reply(dev, msg); - } - - /* - * We need to wait for a reply but the backend does not - * support replies for the command we just sent. - * Send VHOST_USER_GET_FEATURES which makes all backends - * send a reply. - */ - return vhost_user_get_features(dev, &dummy); + return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file); } static int vhost_user_set_vring_addr(struct vhost_dev *dev, @@ -1344,28 +1358,13 @@ static int vhost_user_set_vring_addr(struct vhost_dev *dev, .hdr.size = sizeof(msg.payload.addr), }; - bool reply_supported = virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_REPLY_ACK); - /* * wait for a reply if logging is enabled to make sure * backend is actually logging changes */ bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); - if (reply_supported && wait_for_reply) { - msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; - } - - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; - } - - if (wait_for_reply) { - return enforce_reply(dev, &msg); - } - - return 0; + return vhost_user_write_sync(dev, &msg, wait_for_reply); } static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, @@ -1378,23 +1377,44 @@ static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, .hdr.size = sizeof(msg.payload.u64), }; - if (wait_for_reply) { - bool reply_supported = virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_REPLY_ACK); - if (reply_supported) { - msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; - } + return vhost_user_write_sync(dev, &msg, wait_for_reply); +} + +static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) +{ + return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false); +} + +static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status) +{ + uint64_t value; + int ret; + + ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value); + if (ret < 0) { + return ret; } + *status = value; - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; + return 0; +} + +static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status) +{ + uint8_t s; + int ret; + + ret = vhost_user_get_status(dev, &s); + if (ret < 0) { + return ret; } - if (wait_for_reply) { - return enforce_reply(dev, &msg); + if ((s & status) == status) { + return 0; } + s |= status; - return 0; + return vhost_user_set_status(dev, s); } static int vhost_user_set_features(struct vhost_dev *dev, @@ -1405,9 +1425,26 @@ static int vhost_user_set_features(struct vhost_dev *dev, * backend is actually logging changes */ bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); + int ret; - return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features, + /* + * We need to include any extra backend only feature bits that + * might be needed by our device. Currently this includes the + * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol + * features. + */ + ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, + features | dev->backend_features, log_enabled); + + if (virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_STATUS)) { + if (!ret) { + return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); + } + } + + return ret; } static int vhost_user_set_protocol_features(struct vhost_dev *dev, @@ -1424,11 +1461,7 @@ static int vhost_user_set_owner(struct vhost_dev *dev) .hdr.flags = VHOST_USER_VERSION, }; - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -EPROTO; - } - - return 0; + return vhost_user_write(dev, &msg, NULL, 0); } static int vhost_user_get_max_memslots(struct vhost_dev *dev, @@ -1452,41 +1485,64 @@ static int vhost_user_reset_device(struct vhost_dev *dev) { VhostUserMsg msg = { .hdr.flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_RESET_DEVICE, }; - msg.hdr.request = virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_RESET_DEVICE) - ? VHOST_USER_RESET_DEVICE - : VHOST_USER_RESET_OWNER; - - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; + /* + * Historically, reset was not implemented so only reset devices + * that are expecting it. + */ + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_RESET_DEVICE)) { + return -ENOSYS; } - return 0; + return vhost_user_write(dev, &msg, NULL, 0); } -static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) +static int vhost_user_backend_handle_config_change(struct vhost_dev *dev) { - int ret = -1; + if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { + return -ENOSYS; + } + + return dev->config_ops->vhost_dev_config_notifier(dev); +} - if (!dev->config_ops) { - return -1; +/* + * Fetch or create the notifier for a given idx. Newly created + * notifiers are added to the pointer array that tracks them. + */ +static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, + int idx) +{ + VhostUserHostNotifier *n = NULL; + if (idx >= u->notifiers->len) { + g_ptr_array_set_size(u->notifiers, idx + 1); } - if (dev->config_ops->vhost_dev_config_notifier) { - ret = dev->config_ops->vhost_dev_config_notifier(dev); + n = g_ptr_array_index(u->notifiers, idx); + if (!n) { + /* + * In case notification arrive out-of-order, + * make room for current index. + */ + g_ptr_array_remove_index(u->notifiers, idx); + n = g_new0(VhostUserHostNotifier, 1); + n->idx = idx; + g_ptr_array_insert(u->notifiers, idx, n); + trace_vhost_user_create_notifier(idx, n); } - return ret; + return n; } -static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, +static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, VhostUserVringArea *area, int fd) { int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; - size_t page_size = qemu_real_host_page_size; + size_t page_size = qemu_real_host_page_size(); struct vhost_user *u = dev->opaque; VhostUserState *user = u->user; VirtIODevice *vdev = dev->vdev; @@ -1497,17 +1553,15 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, if (!virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { - return -1; + return -EINVAL; } - n = &user->notifier[queue_idx]; - - if (n->addr) { - virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); - object_unparent(OBJECT(&n->mr)); - munmap(n->addr, page_size); - n->addr = NULL; - } + /* + * Fetch notifier and invalidate any old data before setting up + * new mapped address. + */ + n = fetch_or_create_notifier(user, queue_idx); + vhost_user_host_notifier_remove(n, vdev); if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { return 0; @@ -1515,43 +1569,195 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, /* Sanity check. */ if (area->size != page_size) { - return -1; + return -EINVAL; } addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, area->offset); if (addr == MAP_FAILED) { - return -1; + return -EFAULT; } name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", user, queue_idx); - memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, - page_size, addr); + if (!n->mr.ram) { /* Don't init again after suspend. */ + memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, + page_size, addr); + } else { + n->mr.ram_block->host = addr; + } g_free(name); if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { object_unparent(OBJECT(&n->mr)); munmap(addr, page_size); - return -1; + return -ENXIO; } n->addr = addr; - n->set = true; return 0; } -static void close_slave_channel(struct vhost_user *u) +static int +vhost_user_backend_handle_shared_object_add(struct vhost_dev *dev, + VhostUserShared *object) +{ + QemuUUID uuid; + + memcpy(uuid.data, object->uuid, sizeof(object->uuid)); + return virtio_add_vhost_device(&uuid, dev); +} + +static int +vhost_user_backend_handle_shared_object_remove(struct vhost_dev *dev, + VhostUserShared *object) +{ + QemuUUID uuid; + + memcpy(uuid.data, object->uuid, sizeof(object->uuid)); + switch (virtio_object_type(&uuid)) { + case TYPE_VHOST_DEV: + { + struct vhost_dev *owner = virtio_lookup_vhost_device(&uuid); + if (dev != owner) { + /* Not allowed to remove non-owned entries */ + return 0; + } + break; + } + default: + /* Not allowed to remove non-owned entries */ + return 0; + } + + return virtio_remove_resource(&uuid); +} + +static bool vhost_user_send_resp(QIOChannel *ioc, VhostUserHeader *hdr, + VhostUserPayload *payload, Error **errp) +{ + struct iovec iov[] = { + { .iov_base = hdr, .iov_len = VHOST_USER_HDR_SIZE }, + { .iov_base = payload, .iov_len = hdr->size }, + }; + + hdr->flags &= ~VHOST_USER_NEED_REPLY_MASK; + hdr->flags |= VHOST_USER_REPLY_MASK; + + return !qio_channel_writev_all(ioc, iov, ARRAY_SIZE(iov), errp); +} + +static bool +vhost_user_backend_send_dmabuf_fd(QIOChannel *ioc, VhostUserHeader *hdr, + VhostUserPayload *payload, Error **errp) +{ + hdr->size = sizeof(payload->u64); + return vhost_user_send_resp(ioc, hdr, payload, errp); +} + +int vhost_user_get_shared_object(struct vhost_dev *dev, unsigned char *uuid, + int *dmabuf_fd) +{ + struct vhost_user *u = dev->opaque; + CharBackend *chr = u->user->chr; + int ret; + VhostUserMsg msg = { + .hdr.request = VHOST_USER_GET_SHARED_OBJECT, + .hdr.flags = VHOST_USER_VERSION, + }; + memcpy(msg.payload.object.uuid, uuid, sizeof(msg.payload.object.uuid)); + + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + return ret; + } + + ret = vhost_user_read(dev, &msg); + if (ret < 0) { + return ret; + } + + if (msg.hdr.request != VHOST_USER_GET_SHARED_OBJECT) { + error_report("Received unexpected msg type. " + "Expected %d received %d", + VHOST_USER_GET_SHARED_OBJECT, msg.hdr.request); + return -EPROTO; + } + + *dmabuf_fd = qemu_chr_fe_get_msgfd(chr); + if (*dmabuf_fd < 0) { + error_report("Failed to get dmabuf fd"); + return -EIO; + } + + return 0; +} + +static int +vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u, + QIOChannel *ioc, + VhostUserHeader *hdr, + VhostUserPayload *payload) +{ + QemuUUID uuid; + CharBackend *chr = u->user->chr; + Error *local_err = NULL; + int dmabuf_fd = -1; + int fd_num = 0; + + memcpy(uuid.data, payload->object.uuid, sizeof(payload->object.uuid)); + + payload->u64 = 0; + switch (virtio_object_type(&uuid)) { + case TYPE_DMABUF: + dmabuf_fd = virtio_lookup_dmabuf(&uuid); + break; + case TYPE_VHOST_DEV: + { + struct vhost_dev *dev = virtio_lookup_vhost_device(&uuid); + if (dev == NULL) { + payload->u64 = -EINVAL; + break; + } + int ret = vhost_user_get_shared_object(dev, uuid.data, &dmabuf_fd); + if (ret < 0) { + payload->u64 = ret; + } + break; + } + case TYPE_INVALID: + payload->u64 = -EINVAL; + break; + } + + if (dmabuf_fd != -1) { + fd_num++; + } + + if (qemu_chr_fe_set_msgfds(chr, &dmabuf_fd, fd_num) < 0) { + error_report("Failed to set msg fds."); + payload->u64 = -EINVAL; + } + + if (!vhost_user_backend_send_dmabuf_fd(ioc, hdr, payload, &local_err)) { + error_report_err(local_err); + return -EINVAL; + } + + return 0; +} + +static void close_backend_channel(struct vhost_user *u) { - g_source_destroy(u->slave_src); - g_source_unref(u->slave_src); - u->slave_src = NULL; - object_unref(OBJECT(u->slave_ioc)); - u->slave_ioc = NULL; + g_source_destroy(u->backend_src); + g_source_unref(u->backend_src); + u->backend_src = NULL; + object_unref(OBJECT(u->backend_ioc)); + u->backend_ioc = NULL; } -static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, +static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, gpointer opaque) { struct vhost_dev *dev = opaque; @@ -1589,16 +1795,27 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, } switch (hdr.request) { - case VHOST_USER_SLAVE_IOTLB_MSG: + case VHOST_USER_BACKEND_IOTLB_MSG: ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); break; - case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : - ret = vhost_user_slave_handle_config_change(dev); + case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: + ret = vhost_user_backend_handle_config_change(dev); break; - case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: - ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, + case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: + ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area, fd ? fd[0] : -1); break; + case VHOST_USER_BACKEND_SHARED_OBJECT_ADD: + ret = vhost_user_backend_handle_shared_object_add(dev, &payload.object); + break; + case VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE: + ret = vhost_user_backend_handle_shared_object_remove(dev, + &payload.object); + break; + case VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP: + ret = vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc, + &hdr, &payload); + break; default: error_report("Received unexpected msg type: %d.", hdr.request); ret = -EINVAL; @@ -1609,21 +1826,10 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, * directly in their request handlers. */ if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { - struct iovec iovec[2]; - - - hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; - hdr.flags |= VHOST_USER_REPLY_MASK; - payload.u64 = !!ret; hdr.size = sizeof(payload.u64); - iovec[0].iov_base = &hdr; - iovec[0].iov_len = VHOST_USER_HDR_SIZE; - iovec[1].iov_base = &payload; - iovec[1].iov_len = hdr.size; - - if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { + if (!vhost_user_send_resp(ioc, &hdr, &payload, &local_err)) { error_report_err(local_err); goto err; } @@ -1632,7 +1838,7 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, goto fdcleanup; err: - close_slave_channel(u); + close_backend_channel(u); rc = G_SOURCE_REMOVE; fdcleanup: @@ -1644,10 +1850,10 @@ fdcleanup: return rc; } -static int vhost_setup_slave_channel(struct vhost_dev *dev) +static int vhost_setup_backend_channel(struct vhost_dev *dev) { VhostUserMsg msg = { - .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, + .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, .hdr.flags = VHOST_USER_VERSION, }; struct vhost_user *u = dev->opaque; @@ -1658,22 +1864,25 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) QIOChannel *ioc; if (!virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + VHOST_USER_PROTOCOL_F_BACKEND_REQ)) { return 0; } - if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { + if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { + int saved_errno = errno; error_report("socketpair() failed"); - return -1; + return -saved_errno; } ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); if (!ioc) { error_report_err(local_err); - return -1; + return -ECONNREFUSED; } - u->slave_ioc = ioc; - slave_update_read_handler(dev, NULL); + u->backend_ioc = ioc; + u->backend_src = qio_channel_add_watch_source(u->backend_ioc, + G_IO_IN | G_IO_HUP, + backend_read, dev, NULL, NULL); if (reply_supported) { msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; @@ -1691,7 +1900,7 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) out: close(sv[1]); if (ret) { - close_slave_channel(u); + close_backend_channel(u); } return ret; @@ -1777,37 +1986,40 @@ static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) struct vhost_user *u = dev->opaque; CharBackend *chr = u->user->chr; int ufd; + int ret; VhostUserMsg msg = { .hdr.request = VHOST_USER_POSTCOPY_ADVISE, .hdr.flags = VHOST_USER_VERSION, }; - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { error_setg(errp, "Failed to send postcopy_advise to vhost"); - return -1; + return ret; } - if (vhost_user_read(dev, &msg) < 0) { + ret = vhost_user_read(dev, &msg); + if (ret < 0) { error_setg(errp, "Failed to get postcopy_advise reply from vhost"); - return -1; + return ret; } if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { error_setg(errp, "Unexpected msg type. Expected %d received %d", VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); - return -1; + return -EPROTO; } if (msg.hdr.size) { error_setg(errp, "Received bad msg size."); - return -1; + return -EPROTO; } ufd = qemu_chr_fe_get_msgfd(chr); if (ufd < 0) { error_setg(errp, "%s: Failed to get ufd", __func__); - return -1; + return -EIO; } - qemu_set_nonblock(ufd); + qemu_socket_set_nonblock(ufd); /* register ufd with userfault thread */ u->postcopy_fd.fd = ufd; @@ -1819,7 +2031,7 @@ static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) return 0; #else error_setg(errp, "Postcopy not supported on non-Linux systems"); - return -1; + return -ENOSYS; #endif } @@ -1835,10 +2047,13 @@ static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, }; u->postcopy_listen = true; + trace_vhost_user_postcopy_listen(); - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { error_setg(errp, "Failed to send postcopy_listen to vhost"); - return -1; + return ret; } ret = process_message_reply(dev, &msg); @@ -1863,9 +2078,11 @@ static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) struct vhost_user *u = dev->opaque; trace_vhost_user_postcopy_end_entry(); - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { error_setg(errp, "Failed to send postcopy_end to vhost"); - return -1; + return ret; } ret = process_message_reply(dev, &msg); @@ -1883,7 +2100,7 @@ static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) } static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, - void *opaque) + void *opaque, Error **errp) { struct PostcopyNotifyData *pnd = opaque; struct vhost_user *u = container_of(notifier, struct vhost_user, @@ -1895,20 +2112,20 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, if (!virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_PAGEFAULT)) { /* TODO: Get the device name into this error somehow */ - error_setg(pnd->errp, + error_setg(errp, "vhost-user backend not capable of postcopy"); return -ENOENT; } break; case POSTCOPY_NOTIFY_INBOUND_ADVISE: - return vhost_user_postcopy_advise(dev, pnd->errp); + return vhost_user_postcopy_advise(dev, errp); case POSTCOPY_NOTIFY_INBOUND_LISTEN: - return vhost_user_postcopy_listen(dev, pnd->errp); + return vhost_user_postcopy_listen(dev, errp); case POSTCOPY_NOTIFY_INBOUND_END: - return vhost_user_postcopy_end(dev, pnd->errp); + return vhost_user_postcopy_end(dev, errp); default: /* We ignore notifications we don't know */ @@ -1921,14 +2138,15 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, Error **errp) { - uint64_t features, protocol_features, ram_slots; + uint64_t features, ram_slots; struct vhost_user *u; + VhostUserState *vus = (VhostUserState *) opaque; int err; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); u = g_new0(struct vhost_user, 1); - u->user = opaque; + u->user = vus; u->dev = dev; dev->opaque = u; @@ -1939,6 +2157,10 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, } if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { + bool supports_f_config = vus->supports_config || + (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); + uint64_t protocol_features; + dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, @@ -1948,19 +2170,32 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, return -EPROTO; } - dev->protocol_features = - protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; - - if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { - /* Don't acknowledge CONFIG feature if device doesn't support it */ - dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); - } else if (!(protocol_features & - (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) { - error_setg(errp, "Device expects VHOST_USER_PROTOCOL_F_CONFIG " - "but backend does not support it."); - return -EINVAL; + /* + * We will use all the protocol features we support - although + * we suppress F_CONFIG if we know QEMUs internal code can not support + * it. + */ + protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; + + if (supports_f_config) { + if (!virtio_has_feature(protocol_features, + VHOST_USER_PROTOCOL_F_CONFIG)) { + error_setg(errp, "vhost-user device expecting " + "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " + "not support it."); + return -EPROTO; + } + } else { + if (virtio_has_feature(protocol_features, + VHOST_USER_PROTOCOL_F_CONFIG)) { + warn_report("vhost-user backend supports " + "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); + protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); + } } + /* final set of protocol features */ + dev->protocol_features = protocol_features; err = vhost_user_set_protocol_features(dev, dev->protocol_features); if (err < 0) { error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); @@ -1987,11 +2222,11 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && !(virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_SLAVE_REQ) && + VHOST_USER_PROTOCOL_F_BACKEND_REQ) && virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK))) { error_setg(errp, "IOMMU support requires reply-ack and " - "slave-req protocol features."); + "backend-req protocol features."); return -EINVAL; } @@ -2027,7 +2262,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, } if (dev->vq_index == 0) { - err = vhost_setup_slave_channel(dev); + err = vhost_setup_backend_channel(dev); if (err < 0) { error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); return -EPROTO; @@ -2057,8 +2292,8 @@ static int vhost_user_backend_cleanup(struct vhost_dev *dev) close(u->postcopy_fd.fd); u->postcopy_fd.handler = NULL; } - if (u->slave_ioc) { - close_slave_channel(u); + if (u->backend_ioc) { + close_backend_channel(u); } g_free(u->region_rb); u->region_rb = NULL; @@ -2114,20 +2349,7 @@ static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) return vhost_user_write(dev, &msg, NULL, 0); } - return -1; -} - -static bool vhost_user_can_merge(struct vhost_dev *dev, - uint64_t start1, uint64_t size1, - uint64_t start2, uint64_t size2) -{ - ram_addr_t offset; - int mfd, rfd; - - (void)vhost_user_get_mr_data(start1, &offset, &mfd); - (void)vhost_user_get_mr_data(start2, &offset, &rfd); - - return mfd == rfd; + return -ENOTSUP; } static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) @@ -2135,6 +2357,7 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) VhostUserMsg msg; bool reply_supported = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK); + int ret; if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { return 0; @@ -2148,11 +2371,12 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; } - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + return ret; } - /* If reply_ack supported, slave has to ack specified MTU is valid */ + /* If reply_ack supported, backend has to ack specified MTU is valid */ if (reply_supported) { return process_message_reply(dev, &msg); } @@ -2163,6 +2387,7 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, struct vhost_iotlb_msg *imsg) { + int ret; VhostUserMsg msg = { .hdr.request = VHOST_USER_IOTLB_MSG, .hdr.size = sizeof(msg.payload.iotlb), @@ -2170,8 +2395,9 @@ static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, .payload.iotlb = *imsg, }; - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -EFAULT; + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + return ret; } return process_message_reply(dev, &msg); @@ -2186,6 +2412,7 @@ static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, uint32_t config_len, Error **errp) { + int ret; VhostUserMsg msg = { .hdr.request = VHOST_USER_GET_CONFIG, .hdr.flags = VHOST_USER_VERSION, @@ -2202,26 +2429,28 @@ static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, msg.payload.config.offset = 0; msg.payload.config.size = config_len; - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - error_setg_errno(errp, EPROTO, "vhost_get_config failed"); - return -EPROTO; + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "vhost_get_config failed"); + return ret; } - if (vhost_user_read(dev, &msg) < 0) { - error_setg_errno(errp, EPROTO, "vhost_get_config failed"); - return -EPROTO; + ret = vhost_user_read(dev, &msg); + if (ret < 0) { + error_setg_errno(errp, -ret, "vhost_get_config failed"); + return ret; } if (msg.hdr.request != VHOST_USER_GET_CONFIG) { error_setg(errp, "Received unexpected msg type. Expected %d received %d", VHOST_USER_GET_CONFIG, msg.hdr.request); - return -EINVAL; + return -EPROTO; } if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { error_setg(errp, "Received bad msg size."); - return -EINVAL; + return -EPROTO; } memcpy(config, msg.payload.config.region, config_len); @@ -2232,6 +2461,7 @@ static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, uint32_t offset, uint32_t size, uint32_t flags) { + int ret; uint8_t *p; bool reply_supported = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK); @@ -2244,7 +2474,7 @@ static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, if (!virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_CONFIG)) { - return -1; + return -ENOTSUP; } if (reply_supported) { @@ -2252,7 +2482,7 @@ static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, } if (size > VHOST_USER_MAX_CONFIG_SIZE) { - return -1; + return -EINVAL; } msg.payload.config.offset = offset, @@ -2261,8 +2491,9 @@ static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, p = msg.payload.config.region; memcpy(p, data, size); - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + return ret; } if (reply_supported) { @@ -2276,9 +2507,10 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev, void *session_info, uint64_t *session_id) { + int ret; bool crypto_session = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); - CryptoDevBackendSymSessionInfo *sess_info = session_info; + CryptoDevBackendSessionInfo *backend_info = session_info; VhostUserMsg msg = { .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, .hdr.flags = VHOST_USER_VERSION, @@ -2289,44 +2521,85 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev, if (!crypto_session) { error_report("vhost-user trying to send unhandled ioctl"); - return -1; + return -ENOTSUP; } - memcpy(&msg.payload.session.session_setup_data, sess_info, - sizeof(CryptoDevBackendSymSessionInfo)); - if (sess_info->key_len) { - memcpy(&msg.payload.session.key, sess_info->cipher_key, - sess_info->key_len); - } - if (sess_info->auth_key_len > 0) { - memcpy(&msg.payload.session.auth_key, sess_info->auth_key, - sess_info->auth_key_len); + if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) { + CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info; + size_t keylen; + + memcpy(&msg.payload.session.u.asym.session_setup_data, sess, + sizeof(CryptoDevBackendAsymSessionInfo)); + if (sess->keylen) { + keylen = sizeof(msg.payload.session.u.asym.key); + if (sess->keylen > keylen) { + error_report("Unsupported asymmetric key size"); + return -ENOTSUP; + } + + memcpy(&msg.payload.session.u.asym.key, sess->key, + sess->keylen); + } + } else { + CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info; + size_t keylen; + + memcpy(&msg.payload.session.u.sym.session_setup_data, sess, + sizeof(CryptoDevBackendSymSessionInfo)); + if (sess->key_len) { + keylen = sizeof(msg.payload.session.u.sym.key); + if (sess->key_len > keylen) { + error_report("Unsupported cipher key size"); + return -ENOTSUP; + } + + memcpy(&msg.payload.session.u.sym.key, sess->cipher_key, + sess->key_len); + } + + if (sess->auth_key_len > 0) { + keylen = sizeof(msg.payload.session.u.sym.auth_key); + if (sess->auth_key_len > keylen) { + error_report("Unsupported auth key size"); + return -ENOTSUP; + } + + memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key, + sess->auth_key_len); + } } - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - error_report("vhost_user_write() return -1, create session failed"); - return -1; + + msg.payload.session.op_code = backend_info->op_code; + msg.payload.session.session_id = backend_info->session_id; + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + error_report("vhost_user_write() return %d, create session failed", + ret); + return ret; } - if (vhost_user_read(dev, &msg) < 0) { - error_report("vhost_user_read() return -1, create session failed"); - return -1; + ret = vhost_user_read(dev, &msg); + if (ret < 0) { + error_report("vhost_user_read() return %d, create session failed", + ret); + return ret; } if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { error_report("Received unexpected msg type. Expected %d received %d", VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); - return -1; + return -EPROTO; } if (msg.hdr.size != sizeof(msg.payload.session)) { error_report("Received bad msg size."); - return -1; + return -EPROTO; } if (msg.payload.session.session_id < 0) { error_report("Bad session id: %" PRId64 "", msg.payload.session.session_id); - return -1; + return -EINVAL; } *session_id = msg.payload.session.session_id; @@ -2336,6 +2609,7 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev, static int vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) { + int ret; bool crypto_session = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); VhostUserMsg msg = { @@ -2347,25 +2621,22 @@ vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) if (!crypto_session) { error_report("vhost-user trying to send unhandled ioctl"); - return -1; + return -ENOTSUP; } - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - error_report("vhost_user_write() return -1, close session failed"); - return -1; + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + error_report("vhost_user_write() return %d, close session failed", + ret); + return ret; } return 0; } -static bool vhost_user_mem_section_filter(struct vhost_dev *dev, - MemoryRegionSection *section) +static bool vhost_user_no_private_memslots(struct vhost_dev *dev) { - bool result; - - result = memory_region_get_fd(section->mr) >= 0; - - return result; + return true; } static int vhost_user_get_inflight_fd(struct vhost_dev *dev, @@ -2374,6 +2645,7 @@ static int vhost_user_get_inflight_fd(struct vhost_dev *dev, { void *addr; int fd; + int ret; struct vhost_user *u = dev->opaque; CharBackend *chr = u->user->chr; VhostUserMsg msg = { @@ -2389,24 +2661,26 @@ static int vhost_user_get_inflight_fd(struct vhost_dev *dev, return 0; } - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + return ret; } - if (vhost_user_read(dev, &msg) < 0) { - return -1; + ret = vhost_user_read(dev, &msg); + if (ret < 0) { + return ret; } if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { error_report("Received unexpected msg type. " "Expected %d received %d", VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); - return -1; + return -EPROTO; } if (msg.hdr.size != sizeof(msg.payload.inflight)) { error_report("Received bad msg size."); - return -1; + return -EPROTO; } if (!msg.payload.inflight.mmap_size) { @@ -2416,7 +2690,7 @@ static int vhost_user_get_inflight_fd(struct vhost_dev *dev, fd = qemu_chr_fe_get_msgfd(chr); if (fd < 0) { error_report("Failed to get mem fd"); - return -1; + return -EIO; } addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, @@ -2425,7 +2699,7 @@ static int vhost_user_get_inflight_fd(struct vhost_dev *dev, if (addr == MAP_FAILED) { error_report("Failed to mmap mem fd"); close(fd); - return -1; + return -EFAULT; } inflight->addr = addr; @@ -2455,11 +2729,21 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev, return 0; } - if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) { - return -1; - } + return vhost_user_write(dev, &msg, &inflight->fd, 1); +} - return 0; +static void vhost_user_state_destroy(gpointer data) +{ + VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; + if (n) { + vhost_user_host_notifier_remove(n, NULL); + object_unparent(OBJECT(&n->mr)); + /* + * We can't free until vhost_user_host_notifier_remove has + * done it's thing so schedule the free with RCU. + */ + g_free_rcu(n, rcu); + } } bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) @@ -2470,33 +2754,272 @@ bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) } user->chr = chr; user->memory_slots = 0; + user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, + &vhost_user_state_destroy); return true; } void vhost_user_cleanup(VhostUserState *user) { - int i; - if (!user->chr) { return; } memory_region_transaction_begin(); - for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { - if (user->notifier[i].addr) { - object_unparent(OBJECT(&user->notifier[i].mr)); - munmap(user->notifier[i].addr, qemu_real_host_page_size); - user->notifier[i].addr = NULL; - } - } + user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); memory_region_transaction_commit(); user->chr = NULL; } + +typedef struct { + vu_async_close_fn cb; + DeviceState *dev; + CharBackend *cd; + struct vhost_dev *vhost; + IOEventHandler *event_cb; +} VhostAsyncCallback; + +static void vhost_user_async_close_bh(void *opaque) +{ + VhostAsyncCallback *data = opaque; + struct vhost_dev *vhost = data->vhost; + + /* + * If the vhost_dev has been cleared in the meantime there is + * nothing left to do as some other path has completed the + * cleanup. + */ + if (vhost->vdev) { + data->cb(data->dev); + } else if (data->event_cb) { + qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb, + NULL, data->dev, NULL, true); + } + + g_free(data); +} + +/* + * We only schedule the work if the machine is running. If suspended + * we want to keep all the in-flight data as is for migration + * purposes. + */ +void vhost_user_async_close(DeviceState *d, + CharBackend *chardev, struct vhost_dev *vhost, + vu_async_close_fn cb, + IOEventHandler *event_cb) +{ + if (!runstate_check(RUN_STATE_SHUTDOWN)) { + /* + * A close event may happen during a read/write, but vhost + * code assumes the vhost_dev remains setup, so delay the + * stop & clear. + */ + AioContext *ctx = qemu_get_current_aio_context(); + VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1); + + /* Save data for the callback */ + data->cb = cb; + data->dev = d; + data->cd = chardev; + data->vhost = vhost; + data->event_cb = event_cb; + + /* Disable any further notifications on the chardev */ + qemu_chr_fe_set_handlers(chardev, + NULL, NULL, NULL, NULL, NULL, NULL, + false); + + aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data); + + /* + * Move vhost device to the stopped state. The vhost-user device + * will be clean up and disconnected in BH. This can be useful in + * the vhost migration code. If disconnect was caught there is an + * option for the general vhost code to get the dev state without + * knowing its type (in this case vhost-user). + * + * Note if the vhost device is fully cleared by the time we + * execute the bottom half we won't continue with the cleanup. + */ + vhost->started = false; + } +} + +static int vhost_user_dev_start(struct vhost_dev *dev, bool started) +{ + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_STATUS)) { + return 0; + } + + /* Set device status only for last queue pair */ + if (dev->vq_index + dev->nvqs != dev->vq_index_end) { + return 0; + } + + if (started) { + return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | + VIRTIO_CONFIG_S_DRIVER | + VIRTIO_CONFIG_S_DRIVER_OK); + } else { + return 0; + } +} + +static void vhost_user_reset_status(struct vhost_dev *dev) +{ + /* Set device status only for last queue pair */ + if (dev->vq_index + dev->nvqs != dev->vq_index_end) { + return; + } + + if (virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_STATUS)) { + vhost_user_set_status(dev, 0); + } +} + +static bool vhost_user_supports_device_state(struct vhost_dev *dev) +{ + return virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_DEVICE_STATE); +} + +static int vhost_user_set_device_state_fd(struct vhost_dev *dev, + VhostDeviceStateDirection direction, + VhostDeviceStatePhase phase, + int fd, + int *reply_fd, + Error **errp) +{ + int ret; + struct vhost_user *vu = dev->opaque; + VhostUserMsg msg = { + .hdr = { + .request = VHOST_USER_SET_DEVICE_STATE_FD, + .flags = VHOST_USER_VERSION, + .size = sizeof(msg.payload.transfer_state), + }, + .payload.transfer_state = { + .direction = direction, + .phase = phase, + }, + }; + + *reply_fd = -1; + + if (!vhost_user_supports_device_state(dev)) { + close(fd); + error_setg(errp, "Back-end does not support migration state transfer"); + return -ENOTSUP; + } + + ret = vhost_user_write(dev, &msg, &fd, 1); + close(fd); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to send SET_DEVICE_STATE_FD message"); + return ret; + } + + ret = vhost_user_read(dev, &msg); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to receive SET_DEVICE_STATE_FD reply"); + return ret; + } + + if (msg.hdr.request != VHOST_USER_SET_DEVICE_STATE_FD) { + error_setg(errp, + "Received unexpected message type, expected %d, received %d", + VHOST_USER_SET_DEVICE_STATE_FD, msg.hdr.request); + return -EPROTO; + } + + if (msg.hdr.size != sizeof(msg.payload.u64)) { + error_setg(errp, + "Received bad message size, expected %zu, received %" PRIu32, + sizeof(msg.payload.u64), msg.hdr.size); + return -EPROTO; + } + + if ((msg.payload.u64 & 0xff) != 0) { + error_setg(errp, "Back-end did not accept migration state transfer"); + return -EIO; + } + + if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK)) { + *reply_fd = qemu_chr_fe_get_msgfd(vu->user->chr); + if (*reply_fd < 0) { + error_setg(errp, + "Failed to get back-end-provided transfer pipe FD"); + *reply_fd = -1; + return -EIO; + } + } + + return 0; +} + +static int vhost_user_check_device_state(struct vhost_dev *dev, Error **errp) +{ + int ret; + VhostUserMsg msg = { + .hdr = { + .request = VHOST_USER_CHECK_DEVICE_STATE, + .flags = VHOST_USER_VERSION, + .size = 0, + }, + }; + + if (!vhost_user_supports_device_state(dev)) { + error_setg(errp, "Back-end does not support migration state transfer"); + return -ENOTSUP; + } + + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to send CHECK_DEVICE_STATE message"); + return ret; + } + + ret = vhost_user_read(dev, &msg); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to receive CHECK_DEVICE_STATE reply"); + return ret; + } + + if (msg.hdr.request != VHOST_USER_CHECK_DEVICE_STATE) { + error_setg(errp, + "Received unexpected message type, expected %d, received %d", + VHOST_USER_CHECK_DEVICE_STATE, msg.hdr.request); + return -EPROTO; + } + + if (msg.hdr.size != sizeof(msg.payload.u64)) { + error_setg(errp, + "Received bad message size, expected %zu, received %" PRIu32, + sizeof(msg.payload.u64), msg.hdr.size); + return -EPROTO; + } + + if (msg.payload.u64 != 0) { + error_setg(errp, "Back-end failed to process its internal state"); + return -EIO; + } + + return 0; +} + const VhostOps user_ops = { .backend_type = VHOST_BACKEND_TYPE_USER, .vhost_backend_init = vhost_user_backend_init, .vhost_backend_cleanup = vhost_user_backend_cleanup, .vhost_backend_memslots_limit = vhost_user_memslots_limit, + .vhost_backend_no_private_memslots = vhost_user_no_private_memslots, .vhost_set_log_base = vhost_user_set_log_base, .vhost_set_mem_table = vhost_user_set_mem_table, .vhost_set_vring_addr = vhost_user_set_vring_addr, @@ -2506,6 +3029,7 @@ const VhostOps user_ops = { .vhost_get_vring_base = vhost_user_get_vring_base, .vhost_set_vring_kick = vhost_user_set_vring_kick, .vhost_set_vring_call = vhost_user_set_vring_call, + .vhost_set_vring_err = vhost_user_set_vring_err, .vhost_set_features = vhost_user_set_features, .vhost_get_features = vhost_user_get_features, .vhost_set_owner = vhost_user_set_owner, @@ -2514,7 +3038,6 @@ const VhostOps user_ops = { .vhost_set_vring_enable = vhost_user_set_vring_enable, .vhost_requires_shm_log = vhost_user_requires_shm_log, .vhost_migration_done = vhost_user_migration_done, - .vhost_backend_can_merge = vhost_user_can_merge, .vhost_net_set_mtu = vhost_user_net_set_mtu, .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, @@ -2522,7 +3045,11 @@ const VhostOps user_ops = { .vhost_set_config = vhost_user_set_config, .vhost_crypto_create_session = vhost_user_crypto_create_session, .vhost_crypto_close_session = vhost_user_crypto_close_session, - .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, .vhost_get_inflight_fd = vhost_user_get_inflight_fd, .vhost_set_inflight_fd = vhost_user_set_inflight_fd, + .vhost_dev_start = vhost_user_dev_start, + .vhost_reset_status = vhost_user_reset_status, + .vhost_supports_device_state = vhost_user_supports_device_state, + .vhost_set_device_state_fd = vhost_user_set_device_state_fd, + .vhost_check_device_state = vhost_user_check_device_state, }; |