aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2017-10-31 13:12:21 +0000
committerPeter Maydell <peter.maydell@linaro.org>2017-10-31 13:12:21 +0000
commit92c7ec5cd4d15c76218703f7bd3ca75bd46353b7 (patch)
tree0e17111be31efbc5ab657c4736b3e8556b4cf191
parentd2b9d71432687eef947f243a57409381151701d5 (diff)
parentf140e3000371e67ff4e00df3213e2d576d9c91be (diff)
Merge remote-tracking branch 'remotes/ericb/tags/pull-nbd-2017-10-30' into staging
nbd patches for 2017-10-30 Vladimir Sementsov-Ogievskiy (some patches co-authored by Eric Blake): 00/12 nbd minimal structured read # gpg: Signature made Mon 30 Oct 2017 20:54:54 GMT # gpg: using RSA key 0xA7A16B4A2527436A # gpg: Good signature from "Eric Blake <eblake@redhat.com>" # gpg: aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>" # gpg: aka "[jpeg image of size 6874]" # Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2 F3AA A7A1 6B4A 2527 436A * remotes/ericb/tags/pull-nbd-2017-10-30: nbd: Minimal structured read for client nbd: Move nbd_read() to common header nbd/client: prepare nbd_receive_reply for structured reply nbd/client: refactor nbd_receive_starttls nbd/server: Include human-readable message in structured errors nbd: Minimal structured read for server nbd/server: Refactor zero-length option check nbd/server: Simplify nbd_negotiate_options loop nbd/server: Report error for write to read-only export nbd: Expose constants and structs for structured read nbd: Move nbd_errno_to_system_errno() to public header nbd: Include error names in trace messages Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--block/nbd-client.c492
-rw-r--r--block/nbd-client.h1
-rw-r--r--include/block/nbd.h106
-rw-r--r--nbd/client.c203
-rw-r--r--nbd/common.c84
-rw-r--r--nbd/nbd-internal.h23
-rw-r--r--nbd/server.c210
-rw-r--r--nbd/trace-events15
-rw-r--r--tests/qemu-iotests/083.out15
9 files changed, 962 insertions, 187 deletions
diff --git a/block/nbd-client.c b/block/nbd-client.c
index c0683c3c83..b44d4d4a01 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -92,7 +92,9 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
i = HANDLE_TO_INDEX(s, s->reply.handle);
if (i >= MAX_NBD_REQUESTS ||
!s->requests[i].coroutine ||
- !s->requests[i].receiving) {
+ !s->requests[i].receiving ||
+ (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply))
+ {
break;
}
@@ -139,6 +141,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
assert(i < MAX_NBD_REQUESTS);
s->requests[i].coroutine = qemu_coroutine_self();
+ s->requests[i].offset = request->from;
s->requests[i].receiving = false;
request->handle = INDEX_TO_HANDLE(s, i);
@@ -179,75 +182,489 @@ err:
return rc;
}
-static int nbd_co_receive_reply(NBDClientSession *s,
- uint64_t handle,
- QEMUIOVector *qiov)
+static inline uint16_t payload_advance16(uint8_t **payload)
+{
+ *payload += 2;
+ return lduw_be_p(*payload - 2);
+}
+
+static inline uint32_t payload_advance32(uint8_t **payload)
+{
+ *payload += 4;
+ return ldl_be_p(*payload - 4);
+}
+
+static inline uint64_t payload_advance64(uint8_t **payload)
+{
+ *payload += 8;
+ return ldq_be_p(*payload - 8);
+}
+
+static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
+ uint8_t *payload, uint64_t orig_offset,
+ QEMUIOVector *qiov, Error **errp)
+{
+ uint64_t offset;
+ uint32_t hole_size;
+
+ if (chunk->length != sizeof(offset) + sizeof(hole_size)) {
+ error_setg(errp, "Protocol error: invalid payload for "
+ "NBD_REPLY_TYPE_OFFSET_HOLE");
+ return -EINVAL;
+ }
+
+ offset = payload_advance64(&payload);
+ hole_size = payload_advance32(&payload);
+
+ if (offset < orig_offset || hole_size > qiov->size ||
+ offset > orig_offset + qiov->size - hole_size) {
+ error_setg(errp, "Protocol error: server sent chunk exceeding requested"
+ " region");
+ return -EINVAL;
+ }
+
+ qemu_iovec_memset(qiov, offset - orig_offset, 0, hole_size);
+
+ return 0;
+}
+
+/* nbd_parse_error_payload
+ * on success @errp contains message describing nbd error reply
+ */
+static int nbd_parse_error_payload(NBDStructuredReplyChunk *chunk,
+ uint8_t *payload, int *request_ret,
+ Error **errp)
+{
+ uint32_t error;
+ uint16_t message_size;
+
+ assert(chunk->type & (1 << 15));
+
+ if (chunk->length < sizeof(error) + sizeof(message_size)) {
+ error_setg(errp,
+ "Protocol error: invalid payload for structured error");
+ return -EINVAL;
+ }
+
+ error = nbd_errno_to_system_errno(payload_advance32(&payload));
+ if (error == 0) {
+ error_setg(errp, "Protocol error: server sent structured error chunk"
+ "with error = 0");
+ return -EINVAL;
+ }
+
+ *request_ret = -error;
+ message_size = payload_advance16(&payload);
+
+ if (message_size > chunk->length - sizeof(error) - sizeof(message_size)) {
+ error_setg(errp, "Protocol error: server sent structured error chunk"
+ "with incorrect message size");
+ return -EINVAL;
+ }
+
+ /* TODO: Add a trace point to mention the server complaint */
+
+ /* TODO handle ERROR_OFFSET */
+
+ return 0;
+}
+
+static int nbd_co_receive_offset_data_payload(NBDClientSession *s,
+ uint64_t orig_offset,
+ QEMUIOVector *qiov, Error **errp)
+{
+ QEMUIOVector sub_qiov;
+ uint64_t offset;
+ size_t data_size;
+ int ret;
+ NBDStructuredReplyChunk *chunk = &s->reply.structured;
+
+ assert(nbd_reply_is_structured(&s->reply));
+
+ if (chunk->length < sizeof(offset)) {
+ error_setg(errp, "Protocol error: invalid payload for "
+ "NBD_REPLY_TYPE_OFFSET_DATA");
+ return -EINVAL;
+ }
+
+ if (nbd_read(s->ioc, &offset, sizeof(offset), errp) < 0) {
+ return -EIO;
+ }
+ be64_to_cpus(&offset);
+
+ data_size = chunk->length - sizeof(offset);
+ if (offset < orig_offset || data_size > qiov->size ||
+ offset > orig_offset + qiov->size - data_size) {
+ error_setg(errp, "Protocol error: server sent chunk exceeding requested"
+ " region");
+ return -EINVAL;
+ }
+
+ qemu_iovec_init(&sub_qiov, qiov->niov);
+ qemu_iovec_concat(&sub_qiov, qiov, offset - orig_offset, data_size);
+ ret = qio_channel_readv_all(s->ioc, sub_qiov.iov, sub_qiov.niov, errp);
+ qemu_iovec_destroy(&sub_qiov);
+
+ return ret < 0 ? -EIO : 0;
+}
+
+#define NBD_MAX_MALLOC_PAYLOAD 1000
+/* nbd_co_receive_structured_payload
+ */
+static coroutine_fn int nbd_co_receive_structured_payload(
+ NBDClientSession *s, void **payload, Error **errp)
+{
+ int ret;
+ uint32_t len;
+
+ assert(nbd_reply_is_structured(&s->reply));
+
+ len = s->reply.structured.length;
+
+ if (len == 0) {
+ return 0;
+ }
+
+ if (payload == NULL) {
+ error_setg(errp, "Unexpected structured payload");
+ return -EINVAL;
+ }
+
+ if (len > NBD_MAX_MALLOC_PAYLOAD) {
+ error_setg(errp, "Payload too large");
+ return -EINVAL;
+ }
+
+ *payload = g_new(char, len);
+ ret = nbd_read(s->ioc, *payload, len, errp);
+ if (ret < 0) {
+ g_free(*payload);
+ *payload = NULL;
+ return ret;
+ }
+
+ return 0;
+}
+
+/* nbd_co_do_receive_one_chunk
+ * for simple reply:
+ * set request_ret to received reply error
+ * if qiov is not NULL: read payload to @qiov
+ * for structured reply chunk:
+ * if error chunk: read payload, set @request_ret, do not set @payload
+ * else if offset_data chunk: read payload data to @qiov, do not set @payload
+ * else: read payload to @payload
+ *
+ * If function fails, @errp contains corresponding error message, and the
+ * connection with the server is suspect. If it returns 0, then the
+ * transaction succeeded (although @request_ret may be a negative errno
+ * corresponding to the server's error reply), and errp is unchanged.
+ */
+static coroutine_fn int nbd_co_do_receive_one_chunk(
+ NBDClientSession *s, uint64_t handle, bool only_structured,
+ int *request_ret, QEMUIOVector *qiov, void **payload, Error **errp)
{
int ret;
int i = HANDLE_TO_INDEX(s, handle);
+ void *local_payload = NULL;
+ NBDStructuredReplyChunk *chunk;
+
+ if (payload) {
+ *payload = NULL;
+ }
+ *request_ret = 0;
/* Wait until we're woken up by nbd_read_reply_entry. */
s->requests[i].receiving = true;
qemu_coroutine_yield();
s->requests[i].receiving = false;
if (!s->ioc || s->quit) {
- ret = -EIO;
- } else {
- assert(s->reply.handle == handle);
- ret = -s->reply.error;
- if (qiov && s->reply.error == 0) {
- if (qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov,
- NULL) < 0) {
- ret = -EIO;
- s->quit = true;
- }
+ error_setg(errp, "Connection closed");
+ return -EIO;
+ }
+
+ assert(s->reply.handle == handle);
+
+ if (nbd_reply_is_simple(&s->reply)) {
+ if (only_structured) {
+ error_setg(errp, "Protocol error: simple reply when structured "
+ "reply chunk was expected");
+ return -EINVAL;
}
- /* Tell the read handler to read another header. */
- s->reply.handle = 0;
+ *request_ret = -nbd_errno_to_system_errno(s->reply.simple.error);
+ if (*request_ret < 0 || !qiov) {
+ return 0;
+ }
+
+ return qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov,
+ errp) < 0 ? -EIO : 0;
+ }
+
+ /* handle structured reply chunk */
+ assert(s->info.structured_reply);
+ chunk = &s->reply.structured;
+
+ if (chunk->type == NBD_REPLY_TYPE_NONE) {
+ if (!(chunk->flags & NBD_REPLY_FLAG_DONE)) {
+ error_setg(errp, "Protocol error: NBD_REPLY_TYPE_NONE chunk without"
+ "NBD_REPLY_FLAG_DONE flag set");
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ if (chunk->type == NBD_REPLY_TYPE_OFFSET_DATA) {
+ if (!qiov) {
+ error_setg(errp, "Unexpected NBD_REPLY_TYPE_OFFSET_DATA chunk");
+ return -EINVAL;
+ }
+
+ return nbd_co_receive_offset_data_payload(s, s->requests[i].offset,
+ qiov, errp);
+ }
+
+ if (nbd_reply_type_is_error(chunk->type)) {
+ payload = &local_payload;
+ }
+
+ ret = nbd_co_receive_structured_payload(s, payload, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (nbd_reply_type_is_error(chunk->type)) {
+ ret = nbd_parse_error_payload(chunk, local_payload, request_ret, errp);
+ g_free(local_payload);
+ return ret;
}
- s->requests[i].coroutine = NULL;
+ return 0;
+}
+
+/* nbd_co_receive_one_chunk
+ * Read reply, wake up read_reply_co and set s->quit if needed.
+ * Return value is a fatal error code or normal nbd reply error code
+ */
+static coroutine_fn int nbd_co_receive_one_chunk(
+ NBDClientSession *s, uint64_t handle, bool only_structured,
+ QEMUIOVector *qiov, NBDReply *reply, void **payload, Error **errp)
+{
+ int request_ret;
+ int ret = nbd_co_do_receive_one_chunk(s, handle, only_structured,
+ &request_ret, qiov, payload, errp);
+
+ if (ret < 0) {
+ s->quit = true;
+ } else {
+ /* For assert at loop start in nbd_read_reply_entry */
+ if (reply) {
+ *reply = s->reply;
+ }
+ s->reply.handle = 0;
+ ret = request_ret;
+ }
- /* Kick the read_reply_co to get the next reply. */
if (s->read_reply_co) {
aio_co_wake(s->read_reply_co);
}
+ return ret;
+}
+
+typedef struct NBDReplyChunkIter {
+ int ret;
+ Error *err;
+ bool done, only_structured;
+} NBDReplyChunkIter;
+
+static void nbd_iter_error(NBDReplyChunkIter *iter, bool fatal,
+ int ret, Error **local_err)
+{
+ assert(ret < 0);
+
+ if (fatal || iter->ret == 0) {
+ if (iter->ret != 0) {
+ error_free(iter->err);
+ iter->err = NULL;
+ }
+ iter->ret = ret;
+ error_propagate(&iter->err, *local_err);
+ } else {
+ error_free(*local_err);
+ }
+
+ *local_err = NULL;
+}
+
+/* NBD_FOREACH_REPLY_CHUNK
+ */
+#define NBD_FOREACH_REPLY_CHUNK(s, iter, handle, structured, \
+ qiov, reply, payload) \
+ for (iter = (NBDReplyChunkIter) { .only_structured = structured }; \
+ nbd_reply_chunk_iter_receive(s, &iter, handle, qiov, reply, payload);)
+
+/* nbd_reply_chunk_iter_receive
+ */
+static bool nbd_reply_chunk_iter_receive(NBDClientSession *s,
+ NBDReplyChunkIter *iter,
+ uint64_t handle,
+ QEMUIOVector *qiov, NBDReply *reply,
+ void **payload)
+{
+ int ret;
+ NBDReply local_reply;
+ NBDStructuredReplyChunk *chunk;
+ Error *local_err = NULL;
+ if (s->quit) {
+ error_setg(&local_err, "Connection closed");
+ nbd_iter_error(iter, true, -EIO, &local_err);
+ goto break_loop;
+ }
+
+ if (iter->done) {
+ /* Previous iteration was last. */
+ goto break_loop;
+ }
+
+ if (reply == NULL) {
+ reply = &local_reply;
+ }
+
+ ret = nbd_co_receive_one_chunk(s, handle, iter->only_structured,
+ qiov, reply, payload, &local_err);
+ if (ret < 0) {
+ /* If it is a fatal error s->quit is set by nbd_co_receive_one_chunk */
+ nbd_iter_error(iter, s->quit, ret, &local_err);
+ }
+
+ /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */
+ if (nbd_reply_is_simple(&s->reply) || s->quit) {
+ goto break_loop;
+ }
+
+ chunk = &reply->structured;
+ iter->only_structured = true;
+
+ if (chunk->type == NBD_REPLY_TYPE_NONE) {
+ /* NBD_REPLY_FLAG_DONE is already checked in nbd_co_receive_one_chunk */
+ assert(chunk->flags & NBD_REPLY_FLAG_DONE);
+ goto break_loop;
+ }
+
+ if (chunk->flags & NBD_REPLY_FLAG_DONE) {
+ /* This iteration is last. */
+ iter->done = true;
+ }
+
+ /* Execute the loop body */
+ return true;
+
+break_loop:
+ s->requests[HANDLE_TO_INDEX(s, handle)].coroutine = NULL;
+
qemu_co_mutex_lock(&s->send_mutex);
s->in_flight--;
qemu_co_queue_next(&s->free_sema);
qemu_co_mutex_unlock(&s->send_mutex);
- return ret;
+ return false;
}
-static int nbd_co_request(BlockDriverState *bs,
- NBDRequest *request,
- QEMUIOVector *qiov)
+static int nbd_co_receive_return_code(NBDClientSession *s, uint64_t handle,
+ Error **errp)
+{
+ NBDReplyChunkIter iter;
+
+ NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, NULL, NULL) {
+ /* nbd_reply_chunk_iter_receive does all the work */
+ }
+
+ error_propagate(errp, iter.err);
+ return iter.ret;
+}
+
+static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
+ uint64_t offset, QEMUIOVector *qiov,
+ Error **errp)
+{
+ NBDReplyChunkIter iter;
+ NBDReply reply;
+ void *payload = NULL;
+ Error *local_err = NULL;
+
+ NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply,
+ qiov, &reply, &payload)
+ {
+ int ret;
+ NBDStructuredReplyChunk *chunk = &reply.structured;
+
+ assert(nbd_reply_is_structured(&reply));
+
+ switch (chunk->type) {
+ case NBD_REPLY_TYPE_OFFSET_DATA:
+ /* special cased in nbd_co_receive_one_chunk, data is already
+ * in qiov */
+ break;
+ case NBD_REPLY_TYPE_OFFSET_HOLE:
+ ret = nbd_parse_offset_hole_payload(&reply.structured, payload,
+ offset, qiov, &local_err);
+ if (ret < 0) {
+ s->quit = true;
+ nbd_iter_error(&iter, true, ret, &local_err);
+ }
+ break;
+ default:
+ if (!nbd_reply_type_is_error(chunk->type)) {
+ /* not allowed reply type */
+ s->quit = true;
+ error_setg(&local_err,
+ "Unexpected reply type: %d (%s) for CMD_READ",
+ chunk->type, nbd_reply_type_lookup(chunk->type));
+ nbd_iter_error(&iter, true, -EINVAL, &local_err);
+ }
+ }
+
+ g_free(payload);
+ payload = NULL;
+ }
+
+ error_propagate(errp, iter.err);
+ return iter.ret;
+}
+
+static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
+ QEMUIOVector *write_qiov)
{
- NBDClientSession *client = nbd_get_client_session(bs);
int ret;
+ Error *local_err = NULL;
+ NBDClientSession *client = nbd_get_client_session(bs);
- if (qiov) {
- assert(request->type == NBD_CMD_WRITE || request->type == NBD_CMD_READ);
- assert(request->len == iov_size(qiov->iov, qiov->niov));
+ assert(request->type != NBD_CMD_READ);
+ if (write_qiov) {
+ assert(request->type == NBD_CMD_WRITE);
+ assert(request->len == iov_size(write_qiov->iov, write_qiov->niov));
} else {
- assert(request->type != NBD_CMD_WRITE && request->type != NBD_CMD_READ);
+ assert(request->type != NBD_CMD_WRITE);
}
- ret = nbd_co_send_request(bs, request,
- request->type == NBD_CMD_WRITE ? qiov : NULL);
+ ret = nbd_co_send_request(bs, request, write_qiov);
if (ret < 0) {
return ret;
}
- return nbd_co_receive_reply(client, request->handle,
- request->type == NBD_CMD_READ ? qiov : NULL);
+ ret = nbd_co_receive_return_code(client, request->handle, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ }
+ return ret;
}
int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags)
{
+ int ret;
+ Error *local_err = NULL;
+ NBDClientSession *client = nbd_get_client_session(bs);
NBDRequest request = {
.type = NBD_CMD_READ,
.from = offset,
@@ -257,7 +674,17 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
assert(bytes <= NBD_MAX_BUFFER_SIZE);
assert(!flags);
- return nbd_co_request(bs, &request, qiov);
+ ret = nbd_co_send_request(bs, &request, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = nbd_co_receive_cmdread_reply(client, request.handle, offset, qiov,
+ &local_err);
+ if (ret < 0) {
+ error_report_err(local_err);
+ }
+ return ret;
}
int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
@@ -379,6 +806,7 @@ int nbd_client_init(BlockDriverState *bs,
qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
client->info.request_sizes = true;
+ client->info.structured_reply = true;
ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
tlscreds, hostname,
&client->ioc, &client->info, errp);
diff --git a/block/nbd-client.h b/block/nbd-client.h
index b435754b82..612c4c21a0 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -19,6 +19,7 @@
typedef struct {
Coroutine *coroutine;
+ uint64_t offset; /* original offset of the request */
bool receiving; /* waiting for read_reply_co? */
} NBDClientRequest;
diff --git a/include/block/nbd.h b/include/block/nbd.h
index a6df5ce8b5..92d1723d7c 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -57,18 +57,48 @@ struct NBDRequest {
};
typedef struct NBDRequest NBDRequest;
-struct NBDReply {
- uint64_t handle;
- uint32_t error;
-};
-typedef struct NBDReply NBDReply;
-
typedef struct NBDSimpleReply {
uint32_t magic; /* NBD_SIMPLE_REPLY_MAGIC */
uint32_t error;
uint64_t handle;
} QEMU_PACKED NBDSimpleReply;
+/* Header of all structured replies */
+typedef struct NBDStructuredReplyChunk {
+ uint32_t magic; /* NBD_STRUCTURED_REPLY_MAGIC */
+ uint16_t flags; /* combination of NBD_REPLY_FLAG_* */
+ uint16_t type; /* NBD_REPLY_TYPE_* */
+ uint64_t handle; /* request handle */
+ uint32_t length; /* length of payload */
+} QEMU_PACKED NBDStructuredReplyChunk;
+
+typedef union NBDReply {
+ NBDSimpleReply simple;
+ NBDStructuredReplyChunk structured;
+ struct {
+ /* @magic and @handle fields have the same offset and size both in
+ * simple reply and structured reply chunk, so let them be accessible
+ * without ".simple." or ".structured." specification
+ */
+ uint32_t magic;
+ uint32_t _skip;
+ uint64_t handle;
+ } QEMU_PACKED;
+} NBDReply;
+
+/* Header of NBD_REPLY_TYPE_OFFSET_DATA, complete NBD_REPLY_TYPE_OFFSET_HOLE */
+typedef struct NBDStructuredRead {
+ NBDStructuredReplyChunk h;
+ uint64_t offset;
+} QEMU_PACKED NBDStructuredRead;
+
+/* Header of all NBD_REPLY_TYPE_ERROR* errors */
+typedef struct NBDStructuredError {
+ NBDStructuredReplyChunk h;
+ uint32_t error;
+ uint16_t message_length;
+} QEMU_PACKED NBDStructuredError;
+
/* Transmission (export) flags: sent from server to client during handshake,
but describe what will happen during transmission */
#define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */
@@ -79,6 +109,7 @@ typedef struct NBDSimpleReply {
rotational media */
#define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */
#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */
+#define NBD_FLAG_SEND_DF (1 << 7) /* Send DF (Do not Fragment) */
/* New-style handshake (global) flags, sent from server to client, and
control what will happen during handshake phase. */
@@ -125,6 +156,7 @@ typedef struct NBDSimpleReply {
/* Request flags, sent from client to server during transmission phase */
#define NBD_CMD_FLAG_FUA (1 << 0) /* 'force unit access' during write */
#define NBD_CMD_FLAG_NO_HOLE (1 << 1) /* don't punch hole on zero run */
+#define NBD_CMD_FLAG_DF (1 << 2) /* don't fragment structured read */
/* Supported request types */
enum {
@@ -149,10 +181,49 @@ enum {
* aren't overflowing some other buffer. */
#define NBD_MAX_NAME_SIZE 256
+/* Two types of reply structures */
+#define NBD_SIMPLE_REPLY_MAGIC 0x67446698
+#define NBD_STRUCTURED_REPLY_MAGIC 0x668e33ef
+
+/* Structured reply flags */
+#define NBD_REPLY_FLAG_DONE (1 << 0) /* This reply-chunk is last */
+
+/* Structured reply types */
+#define NBD_REPLY_ERR(value) ((1 << 15) | (value))
+
+#define NBD_REPLY_TYPE_NONE 0
+#define NBD_REPLY_TYPE_OFFSET_DATA 1
+#define NBD_REPLY_TYPE_OFFSET_HOLE 2
+#define NBD_REPLY_TYPE_ERROR NBD_REPLY_ERR(1)
+#define NBD_REPLY_TYPE_ERROR_OFFSET NBD_REPLY_ERR(2)
+
+static inline bool nbd_reply_type_is_error(int type)
+{
+ return type & (1 << 15);
+}
+
+/* NBD errors are based on errno numbers, so there is a 1:1 mapping,
+ * but only a limited set of errno values is specified in the protocol.
+ * Everything else is squashed to EINVAL.
+ */
+#define NBD_SUCCESS 0
+#define NBD_EPERM 1
+#define NBD_EIO 5
+#define NBD_ENOMEM 12
+#define NBD_EINVAL 22
+#define NBD_ENOSPC 28
+#define NBD_EOVERFLOW 75
+#define NBD_ESHUTDOWN 108
+
/* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */
struct NBDExportInfo {
/* Set by client before nbd_receive_negotiate() */
bool request_sizes;
+
+ /* In-out fields, set by client before nbd_receive_negotiate() and
+ * updated by server results during nbd_receive_negotiate() */
+ bool structured_reply;
+
/* Set by server results during nbd_receive_negotiate() */
uint64_t size;
uint16_t flags;
@@ -172,6 +243,7 @@ int nbd_send_request(QIOChannel *ioc, NBDRequest *request);
int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp);
int nbd_client(int fd);
int nbd_disconnect(int fd);
+int nbd_errno_to_system_errno(int err);
typedef struct NBDExport NBDExport;
typedef struct NBDClient NBDClient;
@@ -202,4 +274,26 @@ void nbd_client_put(NBDClient *client);
void nbd_server_start(SocketAddress *addr, const char *tls_creds,
Error **errp);
+
+/* nbd_read
+ * Reads @size bytes from @ioc. Returns 0 on success.
+ */
+static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size,
+ Error **errp)
+{
+ return qio_channel_read_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
+}
+
+static inline bool nbd_reply_is_simple(NBDReply *reply)
+{
+ return reply->magic == NBD_SIMPLE_REPLY_MAGIC;
+}
+
+static inline bool nbd_reply_is_structured(NBDReply *reply)
+{
+ return reply->magic == NBD_STRUCTURED_REPLY_MAGIC;
+}
+
+const char *nbd_reply_type_lookup(uint16_t type);
+
#endif
diff --git a/nbd/client.c b/nbd/client.c
index cd5a2c80ac..3d680e63e1 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -22,38 +22,6 @@
#include "trace.h"
#include "nbd-internal.h"
-static int nbd_errno_to_system_errno(int err)
-{
- int ret;
- switch (err) {
- case NBD_SUCCESS:
- ret = 0;
- break;
- case NBD_EPERM:
- ret = EPERM;
- break;
- case NBD_EIO:
- ret = EIO;
- break;
- case NBD_ENOMEM:
- ret = ENOMEM;
- break;
- case NBD_ENOSPC:
- ret = ENOSPC;
- break;
- case NBD_ESHUTDOWN:
- ret = ESHUTDOWN;
- break;
- default:
- trace_nbd_unknown_error(err);
- /* fallthrough */
- case NBD_EINVAL:
- ret = EINVAL;
- break;
- }
- return ret;
-}
-
/* Definitions for opaque data types */
static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
@@ -540,35 +508,61 @@ static int nbd_receive_query_exports(QIOChannel *ioc,
}
}
-static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
- QCryptoTLSCreds *tlscreds,
- const char *hostname, Error **errp)
+/* nbd_request_simple_option: Send an option request, and parse the reply
+ * return 1 for successful negotiation,
+ * 0 if operation is unsupported,
+ * -1 with errp set for any other error
+ */
+static int nbd_request_simple_option(QIOChannel *ioc, int opt, Error **errp)
{
nbd_opt_reply reply;
- QIOChannelTLS *tioc;
- struct NBDTLSHandshakeData data = { 0 };
+ int error;
- trace_nbd_receive_starttls_request();
- if (nbd_send_option_request(ioc, NBD_OPT_STARTTLS, 0, NULL, errp) < 0) {
- return NULL;
+ if (nbd_send_option_request(ioc, opt, 0, NULL, errp) < 0) {
+ return -1;
}
- trace_nbd_receive_starttls_reply();
- if (nbd_receive_option_reply(ioc, NBD_OPT_STARTTLS, &reply, errp) < 0) {
- return NULL;
+ if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
+ return -1;
+ }
+ error = nbd_handle_reply_err(ioc, &reply, errp);
+ if (error <= 0) {
+ return error;
}
if (reply.type != NBD_REP_ACK) {
- error_setg(errp, "Server rejected request to start TLS %" PRIx32,
- reply.type);
+ error_setg(errp, "Server answered option %d (%s) with unexpected "
+ "reply %" PRIx32 " (%s)", opt, nbd_opt_lookup(opt),
+ reply.type, nbd_rep_lookup(reply.type));
nbd_send_opt_abort(ioc);
- return NULL;
+ return -1;
}
if (reply.length != 0) {
- error_setg(errp, "Start TLS response was not zero %" PRIu32,
+ error_setg(errp, "Option %d ('%s') response length is %" PRIu32
+ " (it should be zero)", opt, nbd_opt_lookup(opt),
reply.length);
nbd_send_opt_abort(ioc);
+ return -1;
+ }
+
+ return 1;
+}
+
+static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
+ QCryptoTLSCreds *tlscreds,
+ const char *hostname, Error **errp)
+{
+ int ret;
+ QIOChannelTLS *tioc;
+ struct NBDTLSHandshakeData data = { 0 };
+
+ ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, errp);
+ if (ret <= 0) {
+ if (ret == 0) {
+ error_setg(errp, "Server don't support STARTTLS option");
+ nbd_send_opt_abort(ioc);
+ }
return NULL;
}
@@ -608,9 +602,11 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
uint64_t magic;
int rc;
bool zeroes = true;
+ bool structured_reply = info->structured_reply;
trace_nbd_receive_negotiate(tlscreds, hostname ? hostname : "<null>");
+ info->structured_reply = false;
rc = -EINVAL;
if (outioc) {
@@ -691,6 +687,16 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
if (fixedNewStyle) {
int result;
+ if (structured_reply) {
+ result = nbd_request_simple_option(ioc,
+ NBD_OPT_STRUCTURED_REPLY,
+ errp);
+ if (result < 0) {
+ goto fail;
+ }
+ info->structured_reply = result == 1;
+ }
+
/* Try NBD_OPT_GO first - if it works, we are done (it
* also gives us a good message if the server requires
* TLS). If it is not available, fall back to
@@ -914,6 +920,57 @@ int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
return nbd_write(ioc, buf, sizeof(buf), NULL);
}
+/* nbd_receive_simple_reply
+ * Read simple reply except magic field (which should be already read).
+ * Payload is not read (payload is possible for CMD_READ, but here we even
+ * don't know whether it take place or not).
+ */
+static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply,
+ Error **errp)
+{
+ int ret;
+
+ assert(reply->magic == NBD_SIMPLE_REPLY_MAGIC);
+
+ ret = nbd_read(ioc, (uint8_t *)reply + sizeof(reply->magic),
+ sizeof(*reply) - sizeof(reply->magic), errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ be32_to_cpus(&reply->error);
+ be64_to_cpus(&reply->handle);
+
+ return 0;
+}
+
+/* nbd_receive_structured_reply_chunk
+ * Read structured reply chunk except magic field (which should be already
+ * read).
+ * Payload is not read.
+ */
+static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
+ NBDStructuredReplyChunk *chunk,
+ Error **errp)
+{
+ int ret;
+
+ assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
+
+ ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
+ sizeof(*chunk) - sizeof(chunk->magic), errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ be16_to_cpus(&chunk->flags);
+ be16_to_cpus(&chunk->type);
+ be64_to_cpus(&chunk->handle);
+ be32_to_cpus(&chunk->length);
+
+ return 0;
+}
+
/* nbd_receive_reply
* Returns 1 on success
* 0 on eof, when no data was read (errp is not set)
@@ -921,37 +978,47 @@ int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
*/
int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
{
- uint8_t buf[NBD_REPLY_SIZE];
- uint32_t magic;
int ret;
- ret = nbd_read_eof(ioc, buf, sizeof(buf), errp);
+ ret = nbd_read_eof(ioc, &reply->magic, sizeof(reply->magic), errp);
if (ret <= 0) {
return ret;
}
- /* Reply
- [ 0 .. 3] magic (NBD_SIMPLE_REPLY_MAGIC)
- [ 4 .. 7] error (0 == no error)
- [ 7 .. 15] handle
- */
-
- magic = ldl_be_p(buf);
- reply->error = ldl_be_p(buf + 4);
- reply->handle = ldq_be_p(buf + 8);
+ be32_to_cpus(&reply->magic);
- reply->error = nbd_errno_to_system_errno(reply->error);
+ switch (reply->magic) {
+ case NBD_SIMPLE_REPLY_MAGIC:
+ ret = nbd_receive_simple_reply(ioc, &reply->simple, errp);
+ if (ret < 0) {
+ break;
+ }
- if (reply->error == ESHUTDOWN) {
- /* This works even on mingw which lacks a native ESHUTDOWN */
- error_setg(errp, "server shutting down");
+ trace_nbd_receive_simple_reply(reply->simple.error,
+ nbd_err_lookup(reply->simple.error),
+ reply->handle);
+ if (reply->simple.error == NBD_ESHUTDOWN) {
+ /* This works even on mingw which lacks a native ESHUTDOWN */
+ error_setg(errp, "server shutting down");
+ return -EINVAL;
+ }
+ break;
+ case NBD_STRUCTURED_REPLY_MAGIC:
+ ret = nbd_receive_structured_reply_chunk(ioc, &reply->structured, errp);
+ if (ret < 0) {
+ break;
+ }
+ trace_nbd_receive_structured_reply_chunk(reply->structured.flags,
+ reply->structured.type,
+ reply->structured.handle,
+ reply->structured.length);
+ break;
+ default:
+ error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", reply->magic);
return -EINVAL;
}
- trace_nbd_receive_reply(magic, reply->error, reply->handle);
-
- if (magic != NBD_SIMPLE_REPLY_MAGIC) {
- error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
- return -EINVAL;
+ if (ret < 0) {
+ return ret;
}
return 1;
diff --git a/nbd/common.c b/nbd/common.c
index 59a5316be9..6047d71748 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -18,6 +18,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
+#include "trace.h"
#include "nbd-internal.h"
/* Discard length bytes from channel. Return -errno on failure and 0 on
@@ -148,3 +149,86 @@ const char *nbd_cmd_lookup(uint16_t cmd)
return "<unknown>";
}
}
+
+
+const char *nbd_reply_type_lookup(uint16_t type)
+{
+ switch (type) {
+ case NBD_REPLY_TYPE_NONE:
+ return "none";
+ case NBD_REPLY_TYPE_OFFSET_DATA:
+ return "data";
+ case NBD_REPLY_TYPE_OFFSET_HOLE:
+ return "hole";
+ case NBD_REPLY_TYPE_ERROR:
+ return "generic error";
+ case NBD_REPLY_TYPE_ERROR_OFFSET:
+ return "error at offset";
+ default:
+ if (type & (1 << 15)) {
+ return "<unknown error>";
+ }
+ return "<unknown>";
+ }
+}
+
+
+const char *nbd_err_lookup(int err)
+{
+ switch (err) {
+ case NBD_SUCCESS:
+ return "success";
+ case NBD_EPERM:
+ return "EPERM";
+ case NBD_EIO:
+ return "EIO";
+ case NBD_ENOMEM:
+ return "ENOMEM";
+ case NBD_EINVAL:
+ return "EINVAL";
+ case NBD_ENOSPC:
+ return "ENOSPC";
+ case NBD_EOVERFLOW:
+ return "EOVERFLOW";
+ case NBD_ESHUTDOWN:
+ return "ESHUTDOWN";
+ default:
+ return "<unknown>";
+ }
+}
+
+
+int nbd_errno_to_system_errno(int err)
+{
+ int ret;
+ switch (err) {
+ case NBD_SUCCESS:
+ ret = 0;
+ break;
+ case NBD_EPERM:
+ ret = EPERM;
+ break;
+ case NBD_EIO:
+ ret = EIO;
+ break;
+ case NBD_ENOMEM:
+ ret = ENOMEM;
+ break;
+ case NBD_ENOSPC:
+ ret = ENOSPC;
+ break;
+ case NBD_EOVERFLOW:
+ ret = EOVERFLOW;
+ break;
+ case NBD_ESHUTDOWN:
+ ret = ESHUTDOWN;
+ break;
+ default:
+ trace_nbd_unknown_error(err);
+ /* fallthrough */
+ case NBD_EINVAL:
+ ret = EINVAL;
+ break;
+ }
+ return ret;
+}
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 11a130d050..eeff78d3c9 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -47,7 +47,6 @@
#define NBD_OLDSTYLE_NEGOTIATE_SIZE (8 + 8 + 8 + 4 + 124)
#define NBD_REQUEST_MAGIC 0x25609513
-#define NBD_SIMPLE_REPLY_MAGIC 0x67446698
#define NBD_OPTS_MAGIC 0x49484156454F5054LL
#define NBD_CLIENT_MAGIC 0x0000420281861253LL
#define NBD_REP_MAGIC 0x0003e889045565a9LL
@@ -64,18 +63,6 @@
#define NBD_SET_TIMEOUT _IO(0xab, 9)
#define NBD_SET_FLAGS _IO(0xab, 10)
-/* NBD errors are based on errno numbers, so there is a 1:1 mapping,
- * but only a limited set of errno values is specified in the protocol.
- * Everything else is squashed to EINVAL.
- */
-#define NBD_SUCCESS 0
-#define NBD_EPERM 1
-#define NBD_EIO 5
-#define NBD_ENOMEM 12
-#define NBD_EINVAL 22
-#define NBD_ENOSPC 28
-#define NBD_ESHUTDOWN 108
-
/* nbd_read_eof
* Tries to read @size bytes from @ioc.
* Returns 1 on success
@@ -95,15 +82,6 @@ static inline int nbd_read_eof(QIOChannel *ioc, void *buffer, size_t size,
return ret;
}
-/* nbd_read
- * Reads @size bytes from @ioc. Returns 0 on success.
- */
-static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size,
- Error **errp)
-{
- return qio_channel_read_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
-}
-
/* nbd_write
* Writes @size bytes to @ioc. Returns 0 on success.
*/
@@ -126,6 +104,7 @@ const char *nbd_opt_lookup(uint32_t opt);
const char *nbd_rep_lookup(uint32_t rep);
const char *nbd_info_lookup(uint16_t info);
const char *nbd_cmd_lookup(uint16_t info);
+const char *nbd_err_lookup(int err);
int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);
diff --git a/nbd/server.c b/nbd/server.c
index 3df3548d6d..70b40ed27e 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -40,6 +40,8 @@ static int system_errno_to_nbd_errno(int err)
case EFBIG:
case ENOSPC:
return NBD_ENOSPC;
+ case EOVERFLOW:
+ return NBD_EOVERFLOW;
case ESHUTDOWN:
return NBD_ESHUTDOWN;
case EINVAL:
@@ -98,6 +100,8 @@ struct NBDClient {
QTAILQ_ENTRY(NBDClient) next;
int nb_requests;
bool closing;
+
+ bool structured_reply;
};
/* That's all folks */
@@ -251,21 +255,10 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp,
/* Process the NBD_OPT_LIST command, with a potential series of replies.
* Return -errno on error, 0 on success. */
-static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length,
- Error **errp)
+static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
{
NBDExport *exp;
- if (length) {
- if (nbd_drop(client->ioc, length, errp) < 0) {
- return -EIO;
- }
- return nbd_negotiate_send_rep_err(client->ioc,
- NBD_REP_ERR_INVALID, NBD_OPT_LIST,
- errp,
- "OPT_LIST should not have length");
- }
-
/* For each export, send a NBD_REP_SERVER reply. */
QTAILQ_FOREACH(exp, &exports, next) {
if (nbd_negotiate_send_rep_list(client->ioc, exp, errp)) {
@@ -529,7 +522,6 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint32_t length,
/* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
* new channel for all further (now-encrypted) communication. */
static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
- uint32_t length,
Error **errp)
{
QIOChannel *ioc;
@@ -538,15 +530,6 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
trace_nbd_negotiate_handle_starttls();
ioc = client->ioc;
- if (length) {
- if (nbd_drop(ioc, length, errp) < 0) {
- return NULL;
- }
- nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
- errp,
- "OPT_STARTTLS should not have length");
- return NULL;
- }
if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
NBD_OPT_STARTTLS, errp) < 0) {
@@ -582,6 +565,34 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
return QIO_CHANNEL(tioc);
}
+/* nbd_reject_length: Handle any unexpected payload.
+ * @fatal requests that we quit talking to the client, even if we are able
+ * to successfully send an error to the guest.
+ * Return:
+ * -errno transmission error occurred or @fatal was requested, errp is set
+ * 0 error message successfully sent to client, errp is not set
+ */
+static int nbd_reject_length(NBDClient *client, uint32_t length,
+ uint32_t option, bool fatal, Error **errp)
+{
+ int ret;
+
+ assert(length);
+ if (nbd_drop(client->ioc, length, errp) < 0) {
+ return -EIO;
+ }
+ ret = nbd_negotiate_send_rep_err(client->ioc, NBD_REP_ERR_INVALID,
+ option, errp,
+ "option '%s' should have zero length",
+ nbd_opt_lookup(option));
+ if (fatal && !ret) {
+ error_setg(errp, "option '%s' should have zero length",
+ nbd_opt_lookup(option));
+ return -EINVAL;
+ }
+ return ret;
+}
+
/* nbd_negotiate_options
* Process all NBD_OPT_* client option commands, during fixed newstyle
* negotiation.
@@ -672,10 +683,17 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
}
switch (option) {
case NBD_OPT_STARTTLS:
- tioc = nbd_negotiate_handle_starttls(client, length, errp);
+ if (length) {
+ /* Unconditionally drop the connection if the client
+ * can't start a TLS negotiation correctly */
+ return nbd_reject_length(client, length, option, true,
+ errp);
+ }
+ tioc = nbd_negotiate_handle_starttls(client, errp);
if (!tioc) {
return -EIO;
}
+ ret = 0;
object_unref(OBJECT(client->ioc));
client->ioc = QIO_CHANNEL(tioc);
break;
@@ -696,9 +714,6 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
"Option 0x%" PRIx32
"not permitted before TLS",
option);
- if (ret < 0) {
- return ret;
- }
/* Let the client keep trying, unless they asked to
* quit. In this mode, we've already sent an error, so
* we can't ack the abort. */
@@ -710,9 +725,11 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
} else if (fixedNewstyle) {
switch (option) {
case NBD_OPT_LIST:
- ret = nbd_negotiate_handle_list(client, length, errp);
- if (ret < 0) {
- return ret;
+ if (length) {
+ ret = nbd_reject_length(client, length, option, false,
+ errp);
+ } else {
+ ret = nbd_negotiate_handle_list(client, errp);
}
break;
@@ -736,16 +753,13 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
assert(option == NBD_OPT_GO);
return 0;
}
- if (ret) {
- return ret;
- }
break;
case NBD_OPT_STARTTLS:
- if (nbd_drop(client->ioc, length, errp) < 0) {
- return -EIO;
- }
- if (client->tlscreds) {
+ if (length) {
+ ret = nbd_reject_length(client, length, option, false,
+ errp);
+ } else if (client->tlscreds) {
ret = nbd_negotiate_send_rep_err(client->ioc,
NBD_REP_ERR_INVALID,
option, errp,
@@ -756,10 +770,24 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
option, errp,
"TLS not configured");
}
- if (ret < 0) {
- return ret;
+ break;
+
+ case NBD_OPT_STRUCTURED_REPLY:
+ if (length) {
+ ret = nbd_reject_length(client, length, option, false,
+ errp);
+ } else if (client->structured_reply) {
+ ret = nbd_negotiate_send_rep_err(
+ client->ioc, NBD_REP_ERR_INVALID, option, errp,
+ "structured reply already negotiated");
+ } else {
+ ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
+ option, errp);
+ client->structured_reply = true;
+ myflags |= NBD_FLAG_SEND_DF;
}
break;
+
default:
if (nbd_drop(client->ioc, length, errp) < 0) {
return -EIO;
@@ -770,9 +798,6 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
"Unsupported option 0x%"
PRIx32 " (%s)", option,
nbd_opt_lookup(option));
- if (ret < 0) {
- return ret;
- }
break;
}
} else {
@@ -792,6 +817,9 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
return -EINVAL;
}
}
+ if (ret < 0) {
+ return ret;
+ }
}
}
@@ -1227,12 +1255,68 @@ static int nbd_co_send_simple_reply(NBDClient *client,
{.iov_base = data, .iov_len = len}
};
- trace_nbd_co_send_simple_reply(handle, nbd_err, len);
+ trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err),
+ len);
set_be_simple_reply(&reply, nbd_err, handle);
return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
}
+static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags,
+ uint16_t type, uint64_t handle, uint32_t length)
+{
+ stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
+ stw_be_p(&chunk->flags, flags);
+ stw_be_p(&chunk->type, type);
+ stq_be_p(&chunk->handle, handle);
+ stl_be_p(&chunk->length, length);
+}
+
+static int coroutine_fn nbd_co_send_structured_read(NBDClient *client,
+ uint64_t handle,
+ uint64_t offset,
+ void *data,
+ size_t size,
+ Error **errp)
+{
+ NBDStructuredRead chunk;
+ struct iovec iov[] = {
+ {.iov_base = &chunk, .iov_len = sizeof(chunk)},
+ {.iov_base = data, .iov_len = size}
+ };
+
+ trace_nbd_co_send_structured_read(handle, offset, data, size);
+ set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_OFFSET_DATA,
+ handle, sizeof(chunk) - sizeof(chunk.h) + size);
+ stq_be_p(&chunk.offset, offset);
+
+ return nbd_co_send_iov(client, iov, 2, errp);
+}
+
+static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
+ uint64_t handle,
+ uint32_t error,
+ const char *msg,
+ Error **errp)
+{
+ NBDStructuredError chunk;
+ int nbd_err = system_errno_to_nbd_errno(error);
+ struct iovec iov[] = {
+ {.iov_base = &chunk, .iov_len = sizeof(chunk)},
+ {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
+ };
+
+ assert(nbd_err);
+ trace_nbd_co_send_structured_error(handle, nbd_err,
+ nbd_err_lookup(nbd_err), msg ? msg : "");
+ set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
+ sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
+ stl_be_p(&chunk.error, nbd_err);
+ stw_be_p(&chunk.message_length, iov[1].iov_len);
+
+ return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
+}
+
/* nbd_co_receive_request
* Collect a client request. Return 0 if request looks valid, -EIO to drop
* connection right away, and any other negative value to report an error to
@@ -1243,6 +1327,7 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
Error **errp)
{
NBDClient *client = req->client;
+ int valid_flags;
g_assert(qemu_in_coroutine());
assert(client->recv_coroutine == qemu_coroutine_self());
@@ -1304,13 +1389,15 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
(uint64_t)client->exp->size);
return request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
}
- if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
- error_setg(errp, "unsupported flags (got 0x%x)", request->flags);
- return -EINVAL;
+ valid_flags = NBD_CMD_FLAG_FUA;
+ if (request->type == NBD_CMD_READ && client->structured_reply) {
+ valid_flags |= NBD_CMD_FLAG_DF;
+ } else if (request->type == NBD_CMD_WRITE_ZEROES) {
+ valid_flags |= NBD_CMD_FLAG_NO_HOLE;
}
- if (request->type != NBD_CMD_WRITE_ZEROES &&
- (request->flags & NBD_CMD_FLAG_NO_HOLE)) {
- error_setg(errp, "unexpected flags (got 0x%x)", request->flags);
+ if (request->flags & ~valid_flags) {
+ error_setg(errp, "unsupported flags for command %s (got 0x%x)",
+ nbd_cmd_lookup(request->type), request->flags);
return -EINVAL;
}
@@ -1328,6 +1415,7 @@ static coroutine_fn void nbd_trip(void *opaque)
int flags;
int reply_data_len = 0;
Error *local_err = NULL;
+ char *msg = NULL;
trace_nbd_trip();
if (client->closing) {
@@ -1378,6 +1466,7 @@ static coroutine_fn void nbd_trip(void *opaque)
break;
case NBD_CMD_WRITE:
if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
+ error_setg(&local_err, "Export is read-only");
ret = -EROFS;
break;
}
@@ -1395,7 +1484,7 @@ static coroutine_fn void nbd_trip(void *opaque)
break;
case NBD_CMD_WRITE_ZEROES:
if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
- error_setg(&local_err, "Server is read-only, return error");
+ error_setg(&local_err, "Export is read-only");
ret = -EROFS;
break;
}
@@ -1443,14 +1532,29 @@ reply:
if (local_err) {
/* If we get here, local_err was not a fatal error, and should be sent
* to the client. */
+ assert(ret < 0);
+ msg = g_strdup(error_get_pretty(local_err));
error_report_err(local_err);
local_err = NULL;
}
- if (nbd_co_send_simple_reply(req->client, request.handle,
- ret < 0 ? -ret : 0,
- req->data, reply_data_len, &local_err) < 0)
- {
+ if (client->structured_reply &&
+ (ret < 0 || request.type == NBD_CMD_READ)) {
+ if (ret < 0) {
+ ret = nbd_co_send_structured_error(req->client, request.handle,
+ -ret, msg, &local_err);
+ } else {
+ ret = nbd_co_send_structured_read(req->client, request.handle,
+ request.from, req->data,
+ reply_data_len, &local_err);
+ }
+ } else {
+ ret = nbd_co_send_simple_reply(req->client, request.handle,
+ ret < 0 ? -ret : 0,
+ req->data, reply_data_len, &local_err);
+ }
+ g_free(msg);
+ if (ret < 0) {
error_prepend(&local_err, "Failed to send reply: ");
goto disconnect;
}
diff --git a/nbd/trace-events b/nbd/trace-events
index e27614f050..4a13757524 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -1,5 +1,4 @@
# nbd/client.c
-nbd_unknown_error(int err) "Squashing unexpected error %d to EINVAL"
nbd_send_option_request(uint32_t opt, const char *name, uint32_t len) "Sending option request %" PRIu32" (%s), len %" PRIu32
nbd_receive_option_reply(uint32_t option, const char *optname, uint32_t type, const char *typename, uint32_t length) "Received option reply 0x%" PRIx32" (%s), type 0x%" PRIx32" (%s), len %" PRIu32
nbd_reply_err_unsup(uint32_t option, const char *name) "server doesn't understand request 0x%" PRIx32 " (%s), attempting fallback"
@@ -9,9 +8,7 @@ nbd_opt_go_info_unknown(int info, const char *name) "Ignoring unknown info %d (%
nbd_opt_go_info_block_size(uint32_t minimum, uint32_t preferred, uint32_t maximum) "Block sizes are 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32
nbd_receive_query_exports_start(const char *wantname) "Querying export list for '%s'"
nbd_receive_query_exports_success(const char *wantname) "Found desired export name '%s'"
-nbd_receive_starttls_request(void) "Requesting TLS from server"
-nbd_receive_starttls_reply(void) "Getting TLS reply from server"
-nbd_receive_starttls_new_client(void) "TLS request approved, setting up TLS"
+nbd_receive_starttls_new_client(void) "Setting up TLS"
nbd_receive_starttls_tls_handshake(void) "Starting TLS handshake"
nbd_receive_negotiate(void *tlscreds, const char *hostname) "Receiving negotiation tlscreds=%p hostname=%s"
nbd_receive_negotiate_magic(uint64_t magic) "Magic is 0x%" PRIx64
@@ -29,7 +26,11 @@ nbd_client_loop_ret(int ret, const char *error) "NBD loop returned %d: %s"
nbd_client_clear_queue(void) "Clearing NBD queue"
nbd_client_clear_socket(void) "Clearing NBD socket"
nbd_send_request(uint64_t from, uint32_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name) "Sending request to server: { .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) }"
-nbd_receive_reply(uint32_t magic, int32_t error, uint64_t handle) "Got reply: { magic = 0x%" PRIx32 ", .error = % " PRId32 ", handle = %" PRIu64" }"
+nbd_receive_simple_reply(int32_t error, const char *errname, uint64_t handle) "Got simple reply: { .error = %" PRId32 " (%s), handle = %" PRIu64" }"
+nbd_receive_structured_reply_chunk(uint16_t flags, uint16_t type, uint64_t handle, uint32_t length) "Got structured reply chunk: { flags = 0x%" PRIx16 ", type = %d, handle = %" PRIu64 ", length = %" PRIu32 " }"
+
+# nbd/common.c
+nbd_unknown_error(int err) "Squashing unexpected error %d to EINVAL"
# nbd/server.c
nbd_negotiate_send_rep_len(uint32_t opt, const char *optname, uint32_t type, const char *typename, uint32_t len) "Reply opt=0x%" PRIx32 " (%s), type=0x%" PRIx32 " (%s), len=%" PRIu32
@@ -53,7 +54,9 @@ nbd_negotiate_success(void) "Negotiation succeeded"
nbd_receive_request(uint32_t magic, uint16_t flags, uint16_t type, uint64_t from, uint32_t len) "Got request: { magic = 0x%" PRIx32 ", .flags = 0x%" PRIx16 ", .type = 0x%" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }"
nbd_blk_aio_attached(const char *name, void *ctx) "Export %s: Attaching clients to AIO context %p\n"
nbd_blk_aio_detach(const char *name, void *ctx) "Export %s: Detaching clients from AIO context %p\n"
-nbd_co_send_simple_reply(uint64_t handle, uint32_t error, int len) "Send simple reply: handle = %" PRIu64 ", error = %" PRIu32 ", len = %d"
+nbd_co_send_simple_reply(uint64_t handle, uint32_t error, const char *errname, int len) "Send simple reply: handle = %" PRIu64 ", error = %" PRIu32 " (%s), len = %d"
+nbd_co_send_structured_read(uint64_t handle, uint64_t offset, void *data, size_t size) "Send structured read data reply: handle = %" PRIu64 ", offset = %" PRIu64 ", data = %p, len = %zu"
+nbd_co_send_structured_error(uint64_t handle, int err, const char *errname, const char *msg) "Send structured error reply: handle = %" PRIu64 ", error = %d (%s), msg = '%s'"
nbd_co_receive_request_decode_type(uint64_t handle, uint16_t type, const char *name) "Decoding type: handle = %" PRIu64 ", type = %" PRIu16 " (%s)"
nbd_co_receive_request_payload_received(uint64_t handle, uint32_t len) "Payload received: handle = %" PRIu64 ", len = %" PRIu32
nbd_co_receive_request_cmd_write(uint32_t len) "Reading %" PRIu32 " byte(s)"
diff --git a/tests/qemu-iotests/083.out b/tests/qemu-iotests/083.out
index 25dde519e3..be6079d27e 100644
--- a/tests/qemu-iotests/083.out
+++ b/tests/qemu-iotests/083.out
@@ -41,6 +41,7 @@ can't open device nbd+tcp://127.0.0.1:PORT/foo
=== Check disconnect after neg2 ===
+Connection closed
read failed: Input/output error
=== Check disconnect 8 neg2 ===
@@ -53,32 +54,39 @@ can't open device nbd+tcp://127.0.0.1:PORT/foo
=== Check disconnect before request ===
+Connection closed
read failed: Input/output error
=== Check disconnect after request ===
+Connection closed
read failed: Input/output error
=== Check disconnect before reply ===
+Connection closed
read failed: Input/output error
=== Check disconnect after reply ===
+Unexpected end-of-file before all bytes were read
read failed: Input/output error
=== Check disconnect 4 reply ===
Unexpected end-of-file before all bytes were read
+Connection closed
read failed: Input/output error
=== Check disconnect 8 reply ===
Unexpected end-of-file before all bytes were read
+Connection closed
read failed: Input/output error
=== Check disconnect before data ===
+Unexpected end-of-file before all bytes were read
read failed: Input/output error
=== Check disconnect after data ===
@@ -108,6 +116,7 @@ can't open device nbd+tcp://127.0.0.1:PORT/
=== Check disconnect after neg-classic ===
+Connection closed
read failed: Input/output error
=== Check disconnect before neg1 ===
@@ -168,28 +177,34 @@ read failed: Input/output error
=== Check disconnect after request ===
+Connection closed
read failed: Input/output error
=== Check disconnect before reply ===
+Connection closed
read failed: Input/output error
=== Check disconnect after reply ===
+Unexpected end-of-file before all bytes were read
read failed: Input/output error
=== Check disconnect 4 reply ===
Unexpected end-of-file before all bytes were read
+Connection closed
read failed: Input/output error
=== Check disconnect 8 reply ===
Unexpected end-of-file before all bytes were read
+Connection closed
read failed: Input/output error
=== Check disconnect before data ===
+Unexpected end-of-file before all bytes were read
read failed: Input/output error
=== Check disconnect after data ===