diff options
-rw-r--r-- | blockdev-nbd.c | 5 | ||||
-rw-r--r-- | hw/misc/vfio.c | 280 | ||||
-rw-r--r-- | include/block/nbd.h | 12 | ||||
-rw-r--r-- | nbd.c | 217 | ||||
-rw-r--r-- | qemu-nbd.c | 1 |
5 files changed, 351 insertions, 164 deletions
diff --git a/blockdev-nbd.c b/blockdev-nbd.c index b60b66d66c..b3a24740b2 100644 --- a/blockdev-nbd.c +++ b/blockdev-nbd.c @@ -28,6 +28,7 @@ static void nbd_accept(void *opaque) int fd = accept(server_fd, (struct sockaddr *)&addr, &addr_len); if (fd >= 0 && !nbd_client_new(NULL, fd, nbd_client_put)) { + shutdown(fd, 2); close(fd); } } @@ -91,6 +92,10 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable, error_set(errp, QERR_DEVICE_NOT_FOUND, device); return; } + if (!bdrv_is_inserted(bs)) { + error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); + return; + } if (!has_writable) { writable = false; diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index 7b279c4f05..aef4c9ce9d 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -121,6 +121,7 @@ typedef struct VFIOINTx { typedef struct VFIOMSIVector { EventNotifier interrupt; /* eventfd triggered on interrupt */ + EventNotifier kvm_interrupt; /* eventfd triggered for KVM irqfd bypass */ struct VFIODevice *vdev; /* back pointer to device */ MSIMessage msg; /* cache the MSI message so we know when it changes */ int virq; /* KVM irqchip route for QEMU bypass */ @@ -642,9 +643,9 @@ static void vfio_msi_interrupt(void *opaque) MSIMessage msg; if (vdev->interrupt == VFIO_INT_MSIX) { - msg = msi_get_message(&vdev->pdev, nr); - } else if (vdev->interrupt == VFIO_INT_MSI) { msg = msix_get_message(&vdev->pdev, nr); + } else if (vdev->interrupt == VFIO_INT_MSI) { + msg = msi_get_message(&vdev->pdev, nr); } else { abort(); } @@ -682,10 +683,11 @@ static int vfio_enable_vectors(VFIODevice *vdev, bool msix) for (i = 0; i < vdev->nr_vectors; i++) { if (!vdev->msi_vectors[i].use) { fds[i] = -1; - continue; + } else if (vdev->msi_vectors[i].virq >= 0) { + fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].kvm_interrupt); + } else { + fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt); } - - fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt); } ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); @@ -695,6 +697,52 @@ static int vfio_enable_vectors(VFIODevice *vdev, bool msix) return ret; } +static void vfio_add_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage *msg, + bool msix) +{ + int virq; + + if ((msix && !VFIO_ALLOW_KVM_MSIX) || + (!msix && !VFIO_ALLOW_KVM_MSI) || !msg) { + return; + } + + if (event_notifier_init(&vector->kvm_interrupt, 0)) { + return; + } + + virq = kvm_irqchip_add_msi_route(kvm_state, *msg); + if (virq < 0) { + event_notifier_cleanup(&vector->kvm_interrupt); + return; + } + + if (kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->kvm_interrupt, + NULL, virq) < 0) { + kvm_irqchip_release_virq(kvm_state, virq); + event_notifier_cleanup(&vector->kvm_interrupt); + return; + } + + vector->msg = *msg; + vector->virq = virq; +} + +static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector) +{ + kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->kvm_interrupt, + vector->virq); + kvm_irqchip_release_virq(kvm_state, vector->virq); + vector->virq = -1; + event_notifier_cleanup(&vector->kvm_interrupt); +} + +static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg) +{ + kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg); + vector->msg = msg; +} + static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, MSIMessage *msg, IOHandler *handler) { @@ -707,30 +755,32 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, vdev->host.function, nr); vector = &vdev->msi_vectors[nr]; - vector->vdev = vdev; - vector->use = true; - - msix_vector_use(pdev, nr); - if (event_notifier_init(&vector->interrupt, 0)) { - error_report("vfio: Error: event_notifier_init failed"); + if (!vector->use) { + vector->vdev = vdev; + vector->virq = -1; + if (event_notifier_init(&vector->interrupt, 0)) { + error_report("vfio: Error: event_notifier_init failed"); + } + vector->use = true; + msix_vector_use(pdev, nr); } + qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), + handler, NULL, vector); + /* * Attempt to enable route through KVM irqchip, * default to userspace handling if unavailable. */ - vector->virq = msg && VFIO_ALLOW_KVM_MSIX ? - kvm_irqchip_add_msi_route(kvm_state, *msg) : -1; - if (vector->virq < 0 || - kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt, - NULL, vector->virq) < 0) { - if (vector->virq >= 0) { - kvm_irqchip_release_virq(kvm_state, vector->virq); - vector->virq = -1; + if (vector->virq >= 0) { + if (!msg) { + vfio_remove_kvm_msi_virq(vector); + } else { + vfio_update_kvm_msi_virq(vector, *msg); } - qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), - handler, NULL, vector); + } else { + vfio_add_kvm_msi_virq(vector, msg, true); } /* @@ -761,7 +811,11 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, irq_set->count = 1; pfd = (int32_t *)&irq_set->data; - *pfd = event_notifier_get_fd(&vector->interrupt); + if (vector->virq >= 0) { + *pfd = event_notifier_get_fd(&vector->kvm_interrupt); + } else { + *pfd = event_notifier_get_fd(&vector->interrupt); + } ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); g_free(irq_set); @@ -783,50 +837,41 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) { VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); VFIOMSIVector *vector = &vdev->msi_vectors[nr]; - int argsz; - struct vfio_irq_set *irq_set; - int32_t *pfd; DPRINTF("%s(%04x:%02x:%02x.%x) vector %d released\n", __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function, nr); /* - * XXX What's the right thing to do here? This turns off the interrupt - * completely, but do we really just want to switch the interrupt to - * bouncing through userspace and let msix.c drop it? Not sure. + * There are still old guests that mask and unmask vectors on every + * interrupt. If we're using QEMU bypass with a KVM irqfd, leave all of + * the KVM setup in place, simply switch VFIO to use the non-bypass + * eventfd. We'll then fire the interrupt through QEMU and the MSI-X + * core will mask the interrupt and set pending bits, allowing it to + * be re-asserted on unmask. Nothing to do if already using QEMU mode. */ - msix_vector_unuse(pdev, nr); - - argsz = sizeof(*irq_set) + sizeof(*pfd); + if (vector->virq >= 0) { + int argsz; + struct vfio_irq_set *irq_set; + int32_t *pfd; - irq_set = g_malloc0(argsz); - irq_set->argsz = argsz; - irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | - VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set->start = nr; - irq_set->count = 1; - pfd = (int32_t *)&irq_set->data; + argsz = sizeof(*irq_set) + sizeof(*pfd); - *pfd = -1; + irq_set = g_malloc0(argsz); + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | + VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = nr; + irq_set->count = 1; + pfd = (int32_t *)&irq_set->data; - ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); + *pfd = event_notifier_get_fd(&vector->interrupt); - g_free(irq_set); + ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); - if (vector->virq < 0) { - qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), - NULL, NULL, NULL); - } else { - kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt, - vector->virq); - kvm_irqchip_release_virq(kvm_state, vector->virq); - vector->virq = -1; + g_free(irq_set); } - - event_notifier_cleanup(&vector->interrupt); - vector->use = false; } static void vfio_enable_msix(VFIODevice *vdev) @@ -876,28 +921,28 @@ retry: VFIOMSIVector *vector = &vdev->msi_vectors[i]; vector->vdev = vdev; + vector->virq = -1; vector->use = true; if (event_notifier_init(&vector->interrupt, 0)) { error_report("vfio: Error: event_notifier_init failed"); } + qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), + vfio_msi_interrupt, NULL, vector); + vector->msg = msi_get_message(&vdev->pdev, i); /* * Attempt to enable route through KVM irqchip, * default to userspace handling if unavailable. */ - vector->virq = VFIO_ALLOW_KVM_MSI ? - kvm_irqchip_add_msi_route(kvm_state, vector->msg) : -1; - if (vector->virq < 0 || - kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt, - NULL, vector->virq) < 0) { - qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), - vfio_msi_interrupt, NULL, vector); - } + vfio_add_kvm_msi_virq(vector, &vector->msg, false); } + /* Set interrupt type prior to possible interrupts */ + vdev->interrupt = VFIO_INT_MSI; + ret = vfio_enable_vectors(vdev, false); if (ret) { if (ret < 0) { @@ -910,14 +955,10 @@ retry: for (i = 0; i < vdev->nr_vectors; i++) { VFIOMSIVector *vector = &vdev->msi_vectors[i]; if (vector->virq >= 0) { - kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt, - vector->virq); - kvm_irqchip_release_virq(kvm_state, vector->virq); - vector->virq = -1; - } else { - qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), - NULL, NULL, NULL); + vfio_remove_kvm_msi_virq(vector); } + qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), + NULL, NULL, NULL); event_notifier_cleanup(&vector->interrupt); } @@ -929,11 +970,17 @@ retry: } vdev->nr_vectors = 0; + /* + * Failing to setup MSI doesn't really fall within any specification. + * Let's try leaving interrupts disabled and hope the guest figures + * out to fall back to INTx for this device. + */ + error_report("vfio: Error: Failed to enable MSI"); + vdev->interrupt = VFIO_INT_NONE; + return; } - vdev->interrupt = VFIO_INT_MSI; - DPRINTF("%s(%04x:%02x:%02x.%x) Enabled %d MSI vectors\n", __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function, vdev->nr_vectors); @@ -941,6 +988,20 @@ retry: static void vfio_disable_msi_common(VFIODevice *vdev) { + int i; + + for (i = 0; i < vdev->nr_vectors; i++) { + VFIOMSIVector *vector = &vdev->msi_vectors[i]; + if (vdev->msi_vectors[i].use) { + if (vector->virq >= 0) { + vfio_remove_kvm_msi_virq(vector); + } + qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), + NULL, NULL, NULL); + event_notifier_cleanup(&vector->interrupt); + } + } + g_free(vdev->msi_vectors); vdev->msi_vectors = NULL; vdev->nr_vectors = 0; @@ -962,6 +1023,7 @@ static void vfio_disable_msix(VFIODevice *vdev) for (i = 0; i < vdev->nr_vectors; i++) { if (vdev->msi_vectors[i].use) { vfio_msix_vector_release(&vdev->pdev, i); + msix_vector_unuse(&vdev->pdev, i); } } @@ -977,30 +1039,7 @@ static void vfio_disable_msix(VFIODevice *vdev) static void vfio_disable_msi(VFIODevice *vdev) { - int i; - vfio_disable_irqindex(vdev, VFIO_PCI_MSI_IRQ_INDEX); - - for (i = 0; i < vdev->nr_vectors; i++) { - VFIOMSIVector *vector = &vdev->msi_vectors[i]; - - if (!vector->use) { - continue; - } - - if (vector->virq >= 0) { - kvm_irqchip_remove_irqfd_notifier(kvm_state, - &vector->interrupt, vector->virq); - kvm_irqchip_release_virq(kvm_state, vector->virq); - vector->virq = -1; - } else { - qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), - NULL, NULL, NULL); - } - - event_notifier_cleanup(&vector->interrupt); - } - vfio_disable_msi_common(vdev); DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain, @@ -1020,17 +1059,7 @@ static void vfio_update_msi(VFIODevice *vdev) } msg = msi_get_message(&vdev->pdev, i); - - if (msg.address != vector->msg.address || - msg.data != vector->msg.data) { - - DPRINTF("%s(%04x:%02x:%02x.%x) MSI vector %d changed\n", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, i); - - kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg); - vector->msg = msg; - } + vfio_update_kvm_msi_virq(vector, msg); } } @@ -1053,10 +1082,10 @@ static void vfio_bar_write(void *opaque, hwaddr addr, buf.byte = data; break; case 2: - buf.word = cpu_to_le16(data); + buf.word = data; break; case 4: - buf.dword = cpu_to_le32(data); + buf.dword = data; break; default: hw_error("vfio: unsupported write size, %d bytes", size); @@ -1113,10 +1142,10 @@ static uint64_t vfio_bar_read(void *opaque, data = buf.byte; break; case 2: - data = le16_to_cpu(buf.word); + data = buf.word; break; case 4: - data = le32_to_cpu(buf.dword); + data = buf.dword; break; default: hw_error("vfio: unsupported read size, %d bytes", size); @@ -1143,7 +1172,7 @@ static uint64_t vfio_bar_read(void *opaque, static const MemoryRegionOps vfio_bar_ops = { .read = vfio_bar_read, .write = vfio_bar_write, - .endianness = DEVICE_LITTLE_ENDIAN, + .endianness = DEVICE_NATIVE_ENDIAN, }; static void vfio_pci_load_rom(VFIODevice *vdev) @@ -1205,21 +1234,42 @@ static void vfio_pci_load_rom(VFIODevice *vdev) static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) { VFIODevice *vdev = opaque; - uint64_t val = ((uint64_t)1 << (size * 8)) - 1; + union { + uint8_t byte; + uint16_t word; + uint32_t dword; + uint64_t qword; + } buf; + uint64_t data = 0; /* Load the ROM lazily when the guest tries to read it */ if (unlikely(!vdev->rom && !vdev->rom_read_failed)) { vfio_pci_load_rom(vdev); } - memcpy(&val, vdev->rom + addr, + memcpy(&buf, vdev->rom + addr, (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0); + switch (size) { + case 1: + data = buf.byte; + break; + case 2: + data = buf.word; + break; + case 4: + data = buf.dword; + break; + default: + hw_error("vfio: unsupported read size, %d bytes", size); + break; + } + DPRINTF("%s(%04x:%02x:%02x.%x, 0x%"HWADDR_PRIx", 0x%x) = 0x%"PRIx64"\n", __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, addr, size, val); + vdev->host.function, addr, size, data); - return val; + return data; } static void vfio_rom_write(void *opaque, hwaddr addr, @@ -1230,7 +1280,7 @@ static void vfio_rom_write(void *opaque, hwaddr addr, static const MemoryRegionOps vfio_rom_ops = { .read = vfio_rom_read, .write = vfio_rom_write, - .endianness = DEVICE_LITTLE_ENDIAN, + .endianness = DEVICE_NATIVE_ENDIAN, }; static bool vfio_blacklist_opt_rom(VFIODevice *vdev) @@ -4012,7 +4062,7 @@ static void vfio_err_notifier_handler(void *opaque) __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function); - vm_stop(RUN_STATE_IO_ERROR); + vm_stop(RUN_STATE_INTERNAL_ERROR); } /* diff --git a/include/block/nbd.h b/include/block/nbd.h index 79502a090b..9e835d2cbb 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -45,6 +45,18 @@ struct nbd_reply { #define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */ #define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */ +/* New-style global flags. */ +#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ + +/* New-style client flags. */ +#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ + +/* Reply types. */ +#define NBD_REP_ACK (1) /* Data sending finished. */ +#define NBD_REP_SERVER (2) /* Export description. */ +#define NBD_REP_ERR_UNSUP ((1 << 31) | 1) /* Unknown option. */ +#define NBD_REP_ERR_INVALID ((1 << 31) | 3) /* Invalid length. */ + #define NBD_CMD_MASK_COMMAND 0x0000ffff #define NBD_CMD_FLAG_FUA (1 << 16) @@ -56,7 +56,11 @@ __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ } while(0) -/* This is all part of the "official" NBD API */ +/* This is all part of the "official" NBD API. + * + * The most up-to-date documentation is available at: + * https://github.com/yoe/nbd/blob/master/doc/proto.txt + */ #define NBD_REQUEST_SIZE (4 + 4 + 8 + 8 + 4) #define NBD_REPLY_SIZE (4 + 4 + 8) @@ -64,6 +68,7 @@ #define NBD_REPLY_MAGIC 0x67446698 #define NBD_OPTS_MAGIC 0x49484156454F5054LL #define NBD_CLIENT_MAGIC 0x0000420281861253LL +#define NBD_REP_MAGIC 0x3e889045565a9LL #define NBD_SET_SOCK _IO(0xab, 0) #define NBD_SET_BLKSIZE _IO(0xab, 1) @@ -77,7 +82,9 @@ #define NBD_SET_TIMEOUT _IO(0xab, 9) #define NBD_SET_FLAGS _IO(0xab, 10) -#define NBD_OPT_EXPORT_NAME (1 << 0) +#define NBD_OPT_EXPORT_NAME (1) +#define NBD_OPT_ABORT (2) +#define NBD_OPT_LIST (3) /* Definitions for opaque data types */ @@ -215,59 +222,101 @@ static ssize_t write_sync(int fd, void *buffer, size_t size) */ -static int nbd_receive_options(NBDClient *client) +static int nbd_send_rep(int csock, uint32_t type, uint32_t opt) { - int csock = client->sock; - char name[256]; - uint32_t tmp, length; uint64_t magic; - int rc; + uint32_t len; - /* Client sends: - [ 0 .. 3] reserved (0) - [ 4 .. 11] NBD_OPTS_MAGIC - [12 .. 15] NBD_OPT_EXPORT_NAME - [16 .. 19] length - [20 .. xx] export name (length bytes) - */ - - rc = -EINVAL; - if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { - LOG("read failed"); - goto fail; + magic = cpu_to_be64(NBD_REP_MAGIC); + if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { + LOG("write failed (rep magic)"); + return -EINVAL; } - TRACE("Checking reserved"); - if (tmp != 0) { - LOG("Bad reserved received"); - goto fail; + opt = cpu_to_be32(opt); + if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) { + LOG("write failed (rep opt)"); + return -EINVAL; } - - if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { - LOG("read failed"); - goto fail; + type = cpu_to_be32(type); + if (write_sync(csock, &type, sizeof(type)) != sizeof(type)) { + LOG("write failed (rep type)"); + return -EINVAL; } - TRACE("Checking reserved"); - if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) { - LOG("Bad magic received"); - goto fail; + len = cpu_to_be32(0); + if (write_sync(csock, &len, sizeof(len)) != sizeof(len)) { + LOG("write failed (rep data length)"); + return -EINVAL; } + return 0; +} - if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { - LOG("read failed"); - goto fail; +static int nbd_send_rep_list(int csock, NBDExport *exp) +{ + uint64_t magic, name_len; + uint32_t opt, type, len; + + name_len = strlen(exp->name); + magic = cpu_to_be64(NBD_REP_MAGIC); + if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { + LOG("write failed (magic)"); + return -EINVAL; + } + opt = cpu_to_be32(NBD_OPT_LIST); + if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) { + LOG("write failed (opt)"); + return -EINVAL; } - TRACE("Checking option"); - if (tmp != be32_to_cpu(NBD_OPT_EXPORT_NAME)) { - LOG("Bad option received"); - goto fail; + type = cpu_to_be32(NBD_REP_SERVER); + if (write_sync(csock, &type, sizeof(type)) != sizeof(type)) { + LOG("write failed (reply type)"); + return -EINVAL; } + len = cpu_to_be32(name_len + sizeof(len)); + if (write_sync(csock, &len, sizeof(len)) != sizeof(len)) { + LOG("write failed (length)"); + return -EINVAL; + } + len = cpu_to_be32(name_len); + if (write_sync(csock, &len, sizeof(len)) != sizeof(len)) { + LOG("write failed (length)"); + return -EINVAL; + } + if (write_sync(csock, exp->name, name_len) != name_len) { + LOG("write failed (buffer)"); + return -EINVAL; + } + return 0; +} - if (read_sync(csock, &length, sizeof(length)) != sizeof(length)) { - LOG("read failed"); - goto fail; +static int nbd_handle_list(NBDClient *client, uint32_t length) +{ + int csock; + NBDExport *exp; + + csock = client->sock; + if (length) { + return nbd_send_rep(csock, NBD_REP_ERR_INVALID, NBD_OPT_LIST); } + + /* For each export, send a NBD_REP_SERVER reply. */ + QTAILQ_FOREACH(exp, &exports, next) { + if (nbd_send_rep_list(csock, exp)) { + return -EINVAL; + } + } + /* Finish with a NBD_REP_ACK. */ + return nbd_send_rep(csock, NBD_REP_ACK, NBD_OPT_LIST); +} + +static int nbd_handle_export_name(NBDClient *client, uint32_t length) +{ + int rc = -EINVAL, csock = client->sock; + char name[256]; + + /* Client sends: + [20 .. xx] export name (length bytes) + */ TRACE("Checking length"); - length = be32_to_cpu(length); if (length > 255) { LOG("Bad length received"); goto fail; @@ -286,13 +335,81 @@ static int nbd_receive_options(NBDClient *client) QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); nbd_export_get(client->exp); - - TRACE("Option negotiation succeeded."); rc = 0; fail: return rc; } +static int nbd_receive_options(NBDClient *client) +{ + while (1) { + int csock = client->sock; + uint32_t tmp, length; + uint64_t magic; + + /* Client sends: + [ 0 .. 3] client flags + [ 4 .. 11] NBD_OPTS_MAGIC + [12 .. 15] NBD option + [16 .. 19] length + ... Rest of request + */ + + if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { + LOG("read failed"); + return -EINVAL; + } + TRACE("Checking client flags"); + tmp = be32_to_cpu(tmp); + if (tmp != 0 && tmp != NBD_FLAG_C_FIXED_NEWSTYLE) { + LOG("Bad client flags received"); + return -EINVAL; + } + + if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { + LOG("read failed"); + return -EINVAL; + } + TRACE("Checking opts magic"); + if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) { + LOG("Bad magic received"); + return -EINVAL; + } + + if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { + LOG("read failed"); + return -EINVAL; + } + + if (read_sync(csock, &length, sizeof(length)) != sizeof(length)) { + LOG("read failed"); + return -EINVAL; + } + length = be32_to_cpu(length); + + TRACE("Checking option"); + switch (be32_to_cpu(tmp)) { + case NBD_OPT_LIST: + if (nbd_handle_list(client, length) < 0) { + return 1; + } + break; + + case NBD_OPT_ABORT: + return -EINVAL; + + case NBD_OPT_EXPORT_NAME: + return nbd_handle_export_name(client, length); + + default: + tmp = be32_to_cpu(tmp); + LOG("Unsupported option 0x%x", tmp); + nbd_send_rep(client->sock, NBD_REP_ERR_UNSUP, tmp); + return -EINVAL; + } + } +} + static int nbd_send_negotiate(NBDClient *client) { int csock = client->sock; @@ -333,6 +450,7 @@ static int nbd_send_negotiate(NBDClient *client) cpu_to_be16w((uint16_t*)(buf + 26), client->exp->nbdflags | myflags); } else { cpu_to_be64w((uint64_t*)(buf + 8), NBD_OPTS_MAGIC); + cpu_to_be16w((uint16_t *)(buf + 16), NBD_FLAG_FIXED_NEWSTYLE); } if (client->exp) { @@ -346,7 +464,7 @@ static int nbd_send_negotiate(NBDClient *client) goto fail; } rc = nbd_receive_options(client); - if (rc < 0) { + if (rc != 0) { LOG("option negotiation failed"); goto fail; } @@ -1001,6 +1119,7 @@ static void nbd_trip(void *opaque) struct nbd_request request; struct nbd_reply reply; ssize_t ret; + uint32_t command; TRACE("Reading request."); if (client->closing) { @@ -1023,8 +1142,8 @@ static void nbd_trip(void *opaque) reply.error = -ret; goto error_reply; } - - if ((request.from + request.len) > exp->size) { + command = request.type & NBD_CMD_MASK_COMMAND; + if (command != NBD_CMD_DISC && (request.from + request.len) > exp->size) { LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64 ", Offset: %" PRIu64 "\n", request.from, request.len, @@ -1033,7 +1152,7 @@ static void nbd_trip(void *opaque) goto invalid_request; } - switch (request.type & NBD_CMD_MASK_COMMAND) { + switch (command) { case NBD_CMD_READ: TRACE("Request type is READ"); @@ -1173,7 +1292,7 @@ NBDClient *nbd_client_new(NBDExport *exp, int csock, client->refcount = 1; client->exp = exp; client->sock = csock; - if (nbd_send_negotiate(client) < 0) { + if (nbd_send_negotiate(client)) { g_free(client); return NULL; } diff --git a/qemu-nbd.c b/qemu-nbd.c index ba6043680a..626e5844f9 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -372,6 +372,7 @@ static void nbd_accept(void *opaque) if (nbd_client_new(exp, fd, nbd_client_closed)) { nb_fds++; } else { + shutdown(fd, 2); close(fd); } } |