aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--blockdev-nbd.c5
-rw-r--r--hw/misc/vfio.c280
-rw-r--r--include/block/nbd.h12
-rw-r--r--nbd.c217
-rw-r--r--qemu-nbd.c1
5 files changed, 351 insertions, 164 deletions
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index b60b66d66c..b3a24740b2 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -28,6 +28,7 @@ static void nbd_accept(void *opaque)
int fd = accept(server_fd, (struct sockaddr *)&addr, &addr_len);
if (fd >= 0 && !nbd_client_new(NULL, fd, nbd_client_put)) {
+ shutdown(fd, 2);
close(fd);
}
}
@@ -91,6 +92,10 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
error_set(errp, QERR_DEVICE_NOT_FOUND, device);
return;
}
+ if (!bdrv_is_inserted(bs)) {
+ error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+ return;
+ }
if (!has_writable) {
writable = false;
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 7b279c4f05..aef4c9ce9d 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -121,6 +121,7 @@ typedef struct VFIOINTx {
typedef struct VFIOMSIVector {
EventNotifier interrupt; /* eventfd triggered on interrupt */
+ EventNotifier kvm_interrupt; /* eventfd triggered for KVM irqfd bypass */
struct VFIODevice *vdev; /* back pointer to device */
MSIMessage msg; /* cache the MSI message so we know when it changes */
int virq; /* KVM irqchip route for QEMU bypass */
@@ -642,9 +643,9 @@ static void vfio_msi_interrupt(void *opaque)
MSIMessage msg;
if (vdev->interrupt == VFIO_INT_MSIX) {
- msg = msi_get_message(&vdev->pdev, nr);
- } else if (vdev->interrupt == VFIO_INT_MSI) {
msg = msix_get_message(&vdev->pdev, nr);
+ } else if (vdev->interrupt == VFIO_INT_MSI) {
+ msg = msi_get_message(&vdev->pdev, nr);
} else {
abort();
}
@@ -682,10 +683,11 @@ static int vfio_enable_vectors(VFIODevice *vdev, bool msix)
for (i = 0; i < vdev->nr_vectors; i++) {
if (!vdev->msi_vectors[i].use) {
fds[i] = -1;
- continue;
+ } else if (vdev->msi_vectors[i].virq >= 0) {
+ fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].kvm_interrupt);
+ } else {
+ fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
}
-
- fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
}
ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
@@ -695,6 +697,52 @@ static int vfio_enable_vectors(VFIODevice *vdev, bool msix)
return ret;
}
+static void vfio_add_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage *msg,
+ bool msix)
+{
+ int virq;
+
+ if ((msix && !VFIO_ALLOW_KVM_MSIX) ||
+ (!msix && !VFIO_ALLOW_KVM_MSI) || !msg) {
+ return;
+ }
+
+ if (event_notifier_init(&vector->kvm_interrupt, 0)) {
+ return;
+ }
+
+ virq = kvm_irqchip_add_msi_route(kvm_state, *msg);
+ if (virq < 0) {
+ event_notifier_cleanup(&vector->kvm_interrupt);
+ return;
+ }
+
+ if (kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->kvm_interrupt,
+ NULL, virq) < 0) {
+ kvm_irqchip_release_virq(kvm_state, virq);
+ event_notifier_cleanup(&vector->kvm_interrupt);
+ return;
+ }
+
+ vector->msg = *msg;
+ vector->virq = virq;
+}
+
+static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector)
+{
+ kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->kvm_interrupt,
+ vector->virq);
+ kvm_irqchip_release_virq(kvm_state, vector->virq);
+ vector->virq = -1;
+ event_notifier_cleanup(&vector->kvm_interrupt);
+}
+
+static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg)
+{
+ kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg);
+ vector->msg = msg;
+}
+
static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
MSIMessage *msg, IOHandler *handler)
{
@@ -707,30 +755,32 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
vdev->host.function, nr);
vector = &vdev->msi_vectors[nr];
- vector->vdev = vdev;
- vector->use = true;
-
- msix_vector_use(pdev, nr);
- if (event_notifier_init(&vector->interrupt, 0)) {
- error_report("vfio: Error: event_notifier_init failed");
+ if (!vector->use) {
+ vector->vdev = vdev;
+ vector->virq = -1;
+ if (event_notifier_init(&vector->interrupt, 0)) {
+ error_report("vfio: Error: event_notifier_init failed");
+ }
+ vector->use = true;
+ msix_vector_use(pdev, nr);
}
+ qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+ handler, NULL, vector);
+
/*
* Attempt to enable route through KVM irqchip,
* default to userspace handling if unavailable.
*/
- vector->virq = msg && VFIO_ALLOW_KVM_MSIX ?
- kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
- if (vector->virq < 0 ||
- kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
- NULL, vector->virq) < 0) {
- if (vector->virq >= 0) {
- kvm_irqchip_release_virq(kvm_state, vector->virq);
- vector->virq = -1;
+ if (vector->virq >= 0) {
+ if (!msg) {
+ vfio_remove_kvm_msi_virq(vector);
+ } else {
+ vfio_update_kvm_msi_virq(vector, *msg);
}
- qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
- handler, NULL, vector);
+ } else {
+ vfio_add_kvm_msi_virq(vector, msg, true);
}
/*
@@ -761,7 +811,11 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
irq_set->count = 1;
pfd = (int32_t *)&irq_set->data;
- *pfd = event_notifier_get_fd(&vector->interrupt);
+ if (vector->virq >= 0) {
+ *pfd = event_notifier_get_fd(&vector->kvm_interrupt);
+ } else {
+ *pfd = event_notifier_get_fd(&vector->interrupt);
+ }
ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
g_free(irq_set);
@@ -783,50 +837,41 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
{
VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
VFIOMSIVector *vector = &vdev->msi_vectors[nr];
- int argsz;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
DPRINTF("%s(%04x:%02x:%02x.%x) vector %d released\n", __func__,
vdev->host.domain, vdev->host.bus, vdev->host.slot,
vdev->host.function, nr);
/*
- * XXX What's the right thing to do here? This turns off the interrupt
- * completely, but do we really just want to switch the interrupt to
- * bouncing through userspace and let msix.c drop it? Not sure.
+ * There are still old guests that mask and unmask vectors on every
+ * interrupt. If we're using QEMU bypass with a KVM irqfd, leave all of
+ * the KVM setup in place, simply switch VFIO to use the non-bypass
+ * eventfd. We'll then fire the interrupt through QEMU and the MSI-X
+ * core will mask the interrupt and set pending bits, allowing it to
+ * be re-asserted on unmask. Nothing to do if already using QEMU mode.
*/
- msix_vector_unuse(pdev, nr);
-
- argsz = sizeof(*irq_set) + sizeof(*pfd);
+ if (vector->virq >= 0) {
+ int argsz;
+ struct vfio_irq_set *irq_set;
+ int32_t *pfd;
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
- VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
- irq_set->start = nr;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
+ argsz = sizeof(*irq_set) + sizeof(*pfd);
- *pfd = -1;
+ irq_set = g_malloc0(argsz);
+ irq_set->argsz = argsz;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+ VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+ irq_set->start = nr;
+ irq_set->count = 1;
+ pfd = (int32_t *)&irq_set->data;
- ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+ *pfd = event_notifier_get_fd(&vector->interrupt);
- g_free(irq_set);
+ ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
- if (vector->virq < 0) {
- qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
- NULL, NULL, NULL);
- } else {
- kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt,
- vector->virq);
- kvm_irqchip_release_virq(kvm_state, vector->virq);
- vector->virq = -1;
+ g_free(irq_set);
}
-
- event_notifier_cleanup(&vector->interrupt);
- vector->use = false;
}
static void vfio_enable_msix(VFIODevice *vdev)
@@ -876,28 +921,28 @@ retry:
VFIOMSIVector *vector = &vdev->msi_vectors[i];
vector->vdev = vdev;
+ vector->virq = -1;
vector->use = true;
if (event_notifier_init(&vector->interrupt, 0)) {
error_report("vfio: Error: event_notifier_init failed");
}
+ qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+ vfio_msi_interrupt, NULL, vector);
+
vector->msg = msi_get_message(&vdev->pdev, i);
/*
* Attempt to enable route through KVM irqchip,
* default to userspace handling if unavailable.
*/
- vector->virq = VFIO_ALLOW_KVM_MSI ?
- kvm_irqchip_add_msi_route(kvm_state, vector->msg) : -1;
- if (vector->virq < 0 ||
- kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
- NULL, vector->virq) < 0) {
- qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
- vfio_msi_interrupt, NULL, vector);
- }
+ vfio_add_kvm_msi_virq(vector, &vector->msg, false);
}
+ /* Set interrupt type prior to possible interrupts */
+ vdev->interrupt = VFIO_INT_MSI;
+
ret = vfio_enable_vectors(vdev, false);
if (ret) {
if (ret < 0) {
@@ -910,14 +955,10 @@ retry:
for (i = 0; i < vdev->nr_vectors; i++) {
VFIOMSIVector *vector = &vdev->msi_vectors[i];
if (vector->virq >= 0) {
- kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt,
- vector->virq);
- kvm_irqchip_release_virq(kvm_state, vector->virq);
- vector->virq = -1;
- } else {
- qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
- NULL, NULL, NULL);
+ vfio_remove_kvm_msi_virq(vector);
}
+ qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+ NULL, NULL, NULL);
event_notifier_cleanup(&vector->interrupt);
}
@@ -929,11 +970,17 @@ retry:
}
vdev->nr_vectors = 0;
+ /*
+ * Failing to setup MSI doesn't really fall within any specification.
+ * Let's try leaving interrupts disabled and hope the guest figures
+ * out to fall back to INTx for this device.
+ */
+ error_report("vfio: Error: Failed to enable MSI");
+ vdev->interrupt = VFIO_INT_NONE;
+
return;
}
- vdev->interrupt = VFIO_INT_MSI;
-
DPRINTF("%s(%04x:%02x:%02x.%x) Enabled %d MSI vectors\n", __func__,
vdev->host.domain, vdev->host.bus, vdev->host.slot,
vdev->host.function, vdev->nr_vectors);
@@ -941,6 +988,20 @@ retry:
static void vfio_disable_msi_common(VFIODevice *vdev)
{
+ int i;
+
+ for (i = 0; i < vdev->nr_vectors; i++) {
+ VFIOMSIVector *vector = &vdev->msi_vectors[i];
+ if (vdev->msi_vectors[i].use) {
+ if (vector->virq >= 0) {
+ vfio_remove_kvm_msi_virq(vector);
+ }
+ qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+ NULL, NULL, NULL);
+ event_notifier_cleanup(&vector->interrupt);
+ }
+ }
+
g_free(vdev->msi_vectors);
vdev->msi_vectors = NULL;
vdev->nr_vectors = 0;
@@ -962,6 +1023,7 @@ static void vfio_disable_msix(VFIODevice *vdev)
for (i = 0; i < vdev->nr_vectors; i++) {
if (vdev->msi_vectors[i].use) {
vfio_msix_vector_release(&vdev->pdev, i);
+ msix_vector_unuse(&vdev->pdev, i);
}
}
@@ -977,30 +1039,7 @@ static void vfio_disable_msix(VFIODevice *vdev)
static void vfio_disable_msi(VFIODevice *vdev)
{
- int i;
-
vfio_disable_irqindex(vdev, VFIO_PCI_MSI_IRQ_INDEX);
-
- for (i = 0; i < vdev->nr_vectors; i++) {
- VFIOMSIVector *vector = &vdev->msi_vectors[i];
-
- if (!vector->use) {
- continue;
- }
-
- if (vector->virq >= 0) {
- kvm_irqchip_remove_irqfd_notifier(kvm_state,
- &vector->interrupt, vector->virq);
- kvm_irqchip_release_virq(kvm_state, vector->virq);
- vector->virq = -1;
- } else {
- qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
- NULL, NULL, NULL);
- }
-
- event_notifier_cleanup(&vector->interrupt);
- }
-
vfio_disable_msi_common(vdev);
DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
@@ -1020,17 +1059,7 @@ static void vfio_update_msi(VFIODevice *vdev)
}
msg = msi_get_message(&vdev->pdev, i);
-
- if (msg.address != vector->msg.address ||
- msg.data != vector->msg.data) {
-
- DPRINTF("%s(%04x:%02x:%02x.%x) MSI vector %d changed\n",
- __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function, i);
-
- kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg);
- vector->msg = msg;
- }
+ vfio_update_kvm_msi_virq(vector, msg);
}
}
@@ -1053,10 +1082,10 @@ static void vfio_bar_write(void *opaque, hwaddr addr,
buf.byte = data;
break;
case 2:
- buf.word = cpu_to_le16(data);
+ buf.word = data;
break;
case 4:
- buf.dword = cpu_to_le32(data);
+ buf.dword = data;
break;
default:
hw_error("vfio: unsupported write size, %d bytes", size);
@@ -1113,10 +1142,10 @@ static uint64_t vfio_bar_read(void *opaque,
data = buf.byte;
break;
case 2:
- data = le16_to_cpu(buf.word);
+ data = buf.word;
break;
case 4:
- data = le32_to_cpu(buf.dword);
+ data = buf.dword;
break;
default:
hw_error("vfio: unsupported read size, %d bytes", size);
@@ -1143,7 +1172,7 @@ static uint64_t vfio_bar_read(void *opaque,
static const MemoryRegionOps vfio_bar_ops = {
.read = vfio_bar_read,
.write = vfio_bar_write,
- .endianness = DEVICE_LITTLE_ENDIAN,
+ .endianness = DEVICE_NATIVE_ENDIAN,
};
static void vfio_pci_load_rom(VFIODevice *vdev)
@@ -1205,21 +1234,42 @@ static void vfio_pci_load_rom(VFIODevice *vdev)
static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
{
VFIODevice *vdev = opaque;
- uint64_t val = ((uint64_t)1 << (size * 8)) - 1;
+ union {
+ uint8_t byte;
+ uint16_t word;
+ uint32_t dword;
+ uint64_t qword;
+ } buf;
+ uint64_t data = 0;
/* Load the ROM lazily when the guest tries to read it */
if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
vfio_pci_load_rom(vdev);
}
- memcpy(&val, vdev->rom + addr,
+ memcpy(&buf, vdev->rom + addr,
(addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
+ switch (size) {
+ case 1:
+ data = buf.byte;
+ break;
+ case 2:
+ data = buf.word;
+ break;
+ case 4:
+ data = buf.dword;
+ break;
+ default:
+ hw_error("vfio: unsupported read size, %d bytes", size);
+ break;
+ }
+
DPRINTF("%s(%04x:%02x:%02x.%x, 0x%"HWADDR_PRIx", 0x%x) = 0x%"PRIx64"\n",
__func__, vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function, addr, size, val);
+ vdev->host.function, addr, size, data);
- return val;
+ return data;
}
static void vfio_rom_write(void *opaque, hwaddr addr,
@@ -1230,7 +1280,7 @@ static void vfio_rom_write(void *opaque, hwaddr addr,
static const MemoryRegionOps vfio_rom_ops = {
.read = vfio_rom_read,
.write = vfio_rom_write,
- .endianness = DEVICE_LITTLE_ENDIAN,
+ .endianness = DEVICE_NATIVE_ENDIAN,
};
static bool vfio_blacklist_opt_rom(VFIODevice *vdev)
@@ -4012,7 +4062,7 @@ static void vfio_err_notifier_handler(void *opaque)
__func__, vdev->host.domain, vdev->host.bus,
vdev->host.slot, vdev->host.function);
- vm_stop(RUN_STATE_IO_ERROR);
+ vm_stop(RUN_STATE_INTERNAL_ERROR);
}
/*
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 79502a090b..9e835d2cbb 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -45,6 +45,18 @@ struct nbd_reply {
#define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */
#define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */
+/* New-style global flags. */
+#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
+
+/* New-style client flags. */
+#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
+
+/* Reply types. */
+#define NBD_REP_ACK (1) /* Data sending finished. */
+#define NBD_REP_SERVER (2) /* Export description. */
+#define NBD_REP_ERR_UNSUP ((1 << 31) | 1) /* Unknown option. */
+#define NBD_REP_ERR_INVALID ((1 << 31) | 3) /* Invalid length. */
+
#define NBD_CMD_MASK_COMMAND 0x0000ffff
#define NBD_CMD_FLAG_FUA (1 << 16)
diff --git a/nbd.c b/nbd.c
index e0d032c252..e7d1ceec43 100644
--- a/nbd.c
+++ b/nbd.c
@@ -56,7 +56,11 @@
__FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
} while(0)
-/* This is all part of the "official" NBD API */
+/* This is all part of the "official" NBD API.
+ *
+ * The most up-to-date documentation is available at:
+ * https://github.com/yoe/nbd/blob/master/doc/proto.txt
+ */
#define NBD_REQUEST_SIZE (4 + 4 + 8 + 8 + 4)
#define NBD_REPLY_SIZE (4 + 4 + 8)
@@ -64,6 +68,7 @@
#define NBD_REPLY_MAGIC 0x67446698
#define NBD_OPTS_MAGIC 0x49484156454F5054LL
#define NBD_CLIENT_MAGIC 0x0000420281861253LL
+#define NBD_REP_MAGIC 0x3e889045565a9LL
#define NBD_SET_SOCK _IO(0xab, 0)
#define NBD_SET_BLKSIZE _IO(0xab, 1)
@@ -77,7 +82,9 @@
#define NBD_SET_TIMEOUT _IO(0xab, 9)
#define NBD_SET_FLAGS _IO(0xab, 10)
-#define NBD_OPT_EXPORT_NAME (1 << 0)
+#define NBD_OPT_EXPORT_NAME (1)
+#define NBD_OPT_ABORT (2)
+#define NBD_OPT_LIST (3)
/* Definitions for opaque data types */
@@ -215,59 +222,101 @@ static ssize_t write_sync(int fd, void *buffer, size_t size)
*/
-static int nbd_receive_options(NBDClient *client)
+static int nbd_send_rep(int csock, uint32_t type, uint32_t opt)
{
- int csock = client->sock;
- char name[256];
- uint32_t tmp, length;
uint64_t magic;
- int rc;
+ uint32_t len;
- /* Client sends:
- [ 0 .. 3] reserved (0)
- [ 4 .. 11] NBD_OPTS_MAGIC
- [12 .. 15] NBD_OPT_EXPORT_NAME
- [16 .. 19] length
- [20 .. xx] export name (length bytes)
- */
-
- rc = -EINVAL;
- if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
- LOG("read failed");
- goto fail;
+ magic = cpu_to_be64(NBD_REP_MAGIC);
+ if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
+ LOG("write failed (rep magic)");
+ return -EINVAL;
}
- TRACE("Checking reserved");
- if (tmp != 0) {
- LOG("Bad reserved received");
- goto fail;
+ opt = cpu_to_be32(opt);
+ if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) {
+ LOG("write failed (rep opt)");
+ return -EINVAL;
}
-
- if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
- LOG("read failed");
- goto fail;
+ type = cpu_to_be32(type);
+ if (write_sync(csock, &type, sizeof(type)) != sizeof(type)) {
+ LOG("write failed (rep type)");
+ return -EINVAL;
}
- TRACE("Checking reserved");
- if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) {
- LOG("Bad magic received");
- goto fail;
+ len = cpu_to_be32(0);
+ if (write_sync(csock, &len, sizeof(len)) != sizeof(len)) {
+ LOG("write failed (rep data length)");
+ return -EINVAL;
}
+ return 0;
+}
- if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
- LOG("read failed");
- goto fail;
+static int nbd_send_rep_list(int csock, NBDExport *exp)
+{
+ uint64_t magic, name_len;
+ uint32_t opt, type, len;
+
+ name_len = strlen(exp->name);
+ magic = cpu_to_be64(NBD_REP_MAGIC);
+ if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
+ LOG("write failed (magic)");
+ return -EINVAL;
+ }
+ opt = cpu_to_be32(NBD_OPT_LIST);
+ if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) {
+ LOG("write failed (opt)");
+ return -EINVAL;
}
- TRACE("Checking option");
- if (tmp != be32_to_cpu(NBD_OPT_EXPORT_NAME)) {
- LOG("Bad option received");
- goto fail;
+ type = cpu_to_be32(NBD_REP_SERVER);
+ if (write_sync(csock, &type, sizeof(type)) != sizeof(type)) {
+ LOG("write failed (reply type)");
+ return -EINVAL;
}
+ len = cpu_to_be32(name_len + sizeof(len));
+ if (write_sync(csock, &len, sizeof(len)) != sizeof(len)) {
+ LOG("write failed (length)");
+ return -EINVAL;
+ }
+ len = cpu_to_be32(name_len);
+ if (write_sync(csock, &len, sizeof(len)) != sizeof(len)) {
+ LOG("write failed (length)");
+ return -EINVAL;
+ }
+ if (write_sync(csock, exp->name, name_len) != name_len) {
+ LOG("write failed (buffer)");
+ return -EINVAL;
+ }
+ return 0;
+}
- if (read_sync(csock, &length, sizeof(length)) != sizeof(length)) {
- LOG("read failed");
- goto fail;
+static int nbd_handle_list(NBDClient *client, uint32_t length)
+{
+ int csock;
+ NBDExport *exp;
+
+ csock = client->sock;
+ if (length) {
+ return nbd_send_rep(csock, NBD_REP_ERR_INVALID, NBD_OPT_LIST);
}
+
+ /* For each export, send a NBD_REP_SERVER reply. */
+ QTAILQ_FOREACH(exp, &exports, next) {
+ if (nbd_send_rep_list(csock, exp)) {
+ return -EINVAL;
+ }
+ }
+ /* Finish with a NBD_REP_ACK. */
+ return nbd_send_rep(csock, NBD_REP_ACK, NBD_OPT_LIST);
+}
+
+static int nbd_handle_export_name(NBDClient *client, uint32_t length)
+{
+ int rc = -EINVAL, csock = client->sock;
+ char name[256];
+
+ /* Client sends:
+ [20 .. xx] export name (length bytes)
+ */
TRACE("Checking length");
- length = be32_to_cpu(length);
if (length > 255) {
LOG("Bad length received");
goto fail;
@@ -286,13 +335,81 @@ static int nbd_receive_options(NBDClient *client)
QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
nbd_export_get(client->exp);
-
- TRACE("Option negotiation succeeded.");
rc = 0;
fail:
return rc;
}
+static int nbd_receive_options(NBDClient *client)
+{
+ while (1) {
+ int csock = client->sock;
+ uint32_t tmp, length;
+ uint64_t magic;
+
+ /* Client sends:
+ [ 0 .. 3] client flags
+ [ 4 .. 11] NBD_OPTS_MAGIC
+ [12 .. 15] NBD option
+ [16 .. 19] length
+ ... Rest of request
+ */
+
+ if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
+ LOG("read failed");
+ return -EINVAL;
+ }
+ TRACE("Checking client flags");
+ tmp = be32_to_cpu(tmp);
+ if (tmp != 0 && tmp != NBD_FLAG_C_FIXED_NEWSTYLE) {
+ LOG("Bad client flags received");
+ return -EINVAL;
+ }
+
+ if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
+ LOG("read failed");
+ return -EINVAL;
+ }
+ TRACE("Checking opts magic");
+ if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) {
+ LOG("Bad magic received");
+ return -EINVAL;
+ }
+
+ if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
+ LOG("read failed");
+ return -EINVAL;
+ }
+
+ if (read_sync(csock, &length, sizeof(length)) != sizeof(length)) {
+ LOG("read failed");
+ return -EINVAL;
+ }
+ length = be32_to_cpu(length);
+
+ TRACE("Checking option");
+ switch (be32_to_cpu(tmp)) {
+ case NBD_OPT_LIST:
+ if (nbd_handle_list(client, length) < 0) {
+ return 1;
+ }
+ break;
+
+ case NBD_OPT_ABORT:
+ return -EINVAL;
+
+ case NBD_OPT_EXPORT_NAME:
+ return nbd_handle_export_name(client, length);
+
+ default:
+ tmp = be32_to_cpu(tmp);
+ LOG("Unsupported option 0x%x", tmp);
+ nbd_send_rep(client->sock, NBD_REP_ERR_UNSUP, tmp);
+ return -EINVAL;
+ }
+ }
+}
+
static int nbd_send_negotiate(NBDClient *client)
{
int csock = client->sock;
@@ -333,6 +450,7 @@ static int nbd_send_negotiate(NBDClient *client)
cpu_to_be16w((uint16_t*)(buf + 26), client->exp->nbdflags | myflags);
} else {
cpu_to_be64w((uint64_t*)(buf + 8), NBD_OPTS_MAGIC);
+ cpu_to_be16w((uint16_t *)(buf + 16), NBD_FLAG_FIXED_NEWSTYLE);
}
if (client->exp) {
@@ -346,7 +464,7 @@ static int nbd_send_negotiate(NBDClient *client)
goto fail;
}
rc = nbd_receive_options(client);
- if (rc < 0) {
+ if (rc != 0) {
LOG("option negotiation failed");
goto fail;
}
@@ -1001,6 +1119,7 @@ static void nbd_trip(void *opaque)
struct nbd_request request;
struct nbd_reply reply;
ssize_t ret;
+ uint32_t command;
TRACE("Reading request.");
if (client->closing) {
@@ -1023,8 +1142,8 @@ static void nbd_trip(void *opaque)
reply.error = -ret;
goto error_reply;
}
-
- if ((request.from + request.len) > exp->size) {
+ command = request.type & NBD_CMD_MASK_COMMAND;
+ if (command != NBD_CMD_DISC && (request.from + request.len) > exp->size) {
LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64
", Offset: %" PRIu64 "\n",
request.from, request.len,
@@ -1033,7 +1152,7 @@ static void nbd_trip(void *opaque)
goto invalid_request;
}
- switch (request.type & NBD_CMD_MASK_COMMAND) {
+ switch (command) {
case NBD_CMD_READ:
TRACE("Request type is READ");
@@ -1173,7 +1292,7 @@ NBDClient *nbd_client_new(NBDExport *exp, int csock,
client->refcount = 1;
client->exp = exp;
client->sock = csock;
- if (nbd_send_negotiate(client) < 0) {
+ if (nbd_send_negotiate(client)) {
g_free(client);
return NULL;
}
diff --git a/qemu-nbd.c b/qemu-nbd.c
index ba6043680a..626e5844f9 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -372,6 +372,7 @@ static void nbd_accept(void *opaque)
if (nbd_client_new(exp, fd, nbd_client_closed)) {
nb_fds++;
} else {
+ shutdown(fd, 2);
close(fd);
}
}