diff options
75 files changed, 4115 insertions, 942 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 653f52e5b3..82c814a919 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1016,6 +1016,13 @@ F: include/sysemu/rng*.h F: backends/rng*.c F: tests/virtio-rng-test.c +virtio-crypto +M: Gonglei <arei.gonglei@huawei.com> +S: Supported +F: hw/virtio/virtio-crypto.c +F: hw/virtio/virtio-crypto-pci.c +F: include/hw/virtio/virtio-crypto.h + nvme M: Keith Busch <keith.busch@intel.com> L: qemu-block@nongnu.org @@ -1261,6 +1268,12 @@ S: Maintained F: backends/hostmem*.c F: include/sysemu/hostmem.h +Cryptodev Backends +M: Gonglei <arei.gonglei@huawei.com> +S: Maintained +F: include/sysemu/cryptodev*.h +F: backends/cryptodev*.c + QAPI M: Markus Armbruster <armbru@redhat.com> M: Michael Roth <mdroth@linux.vnet.ibm.com> diff --git a/backends/Makefile.objs b/backends/Makefile.objs index 31a3a894f5..18469980e6 100644 --- a/backends/Makefile.objs +++ b/backends/Makefile.objs @@ -9,3 +9,6 @@ common-obj-$(CONFIG_TPM) += tpm.o common-obj-y += hostmem.o hostmem-ram.o common-obj-$(CONFIG_LINUX) += hostmem-file.o + +common-obj-y += cryptodev.o +common-obj-y += cryptodev-builtin.o diff --git a/backends/cryptodev-builtin.c b/backends/cryptodev-builtin.c new file mode 100644 index 0000000000..eda954b2a2 --- /dev/null +++ b/backends/cryptodev-builtin.c @@ -0,0 +1,361 @@ +/* + * QEMU Cryptodev backend for QEMU cipher APIs + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Gonglei <arei.gonglei@huawei.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "qemu/osdep.h" +#include "sysemu/cryptodev.h" +#include "hw/boards.h" +#include "qapi/error.h" +#include "standard-headers/linux/virtio_crypto.h" +#include "crypto/cipher.h" + + +/** + * @TYPE_CRYPTODEV_BACKEND_BUILTIN: + * name of backend that uses QEMU cipher API + */ +#define TYPE_CRYPTODEV_BACKEND_BUILTIN "cryptodev-backend-builtin" + +#define CRYPTODEV_BACKEND_BUILTIN(obj) \ + OBJECT_CHECK(CryptoDevBackendBuiltin, \ + (obj), TYPE_CRYPTODEV_BACKEND_BUILTIN) + +typedef struct CryptoDevBackendBuiltin + CryptoDevBackendBuiltin; + +typedef struct CryptoDevBackendBuiltinSession { + QCryptoCipher *cipher; + uint8_t direction; /* encryption or decryption */ + uint8_t type; /* cipher? hash? aead? */ + QTAILQ_ENTRY(CryptoDevBackendBuiltinSession) next; +} CryptoDevBackendBuiltinSession; + +/* Max number of symmetric sessions */ +#define MAX_NUM_SESSIONS 256 + +#define CRYPTODEV_BUITLIN_MAX_AUTH_KEY_LEN 512 +#define CRYPTODEV_BUITLIN_MAX_CIPHER_KEY_LEN 64 + +struct CryptoDevBackendBuiltin { + CryptoDevBackend parent_obj; + + CryptoDevBackendBuiltinSession *sessions[MAX_NUM_SESSIONS]; +}; + +static void cryptodev_builtin_init( + CryptoDevBackend *backend, Error **errp) +{ + /* Only support one queue */ + int queues = backend->conf.peers.queues; + CryptoDevBackendClient *cc; + + if (queues != 1) { + error_setg(errp, + "Only support one queue in cryptdov-builtin backend"); + return; + } + + cc = cryptodev_backend_new_client( + "cryptodev-builtin", NULL); + cc->info_str = g_strdup_printf("cryptodev-builtin0"); + cc->queue_index = 0; + backend->conf.peers.ccs[0] = cc; + + backend->conf.crypto_services = + 1u << VIRTIO_CRYPTO_SERVICE_CIPHER | + 1u << VIRTIO_CRYPTO_SERVICE_HASH | + 1u << VIRTIO_CRYPTO_SERVICE_MAC; + backend->conf.cipher_algo_l = 1u << VIRTIO_CRYPTO_CIPHER_AES_CBC; + backend->conf.hash_algo = 1u << VIRTIO_CRYPTO_HASH_SHA1; + /* + * Set the Maximum length of crypto request. + * Why this value? Just avoid to overflow when + * memory allocation for each crypto request. + */ + backend->conf.max_size = LONG_MAX - sizeof(CryptoDevBackendSymOpInfo); + backend->conf.max_cipher_key_len = CRYPTODEV_BUITLIN_MAX_CIPHER_KEY_LEN; + backend->conf.max_auth_key_len = CRYPTODEV_BUITLIN_MAX_AUTH_KEY_LEN; +} + +static int +cryptodev_builtin_get_unused_session_index( + CryptoDevBackendBuiltin *builtin) +{ + size_t i; + + for (i = 0; i < MAX_NUM_SESSIONS; i++) { + if (builtin->sessions[i] == NULL) { + return i; + } + } + + return -1; +} + +static int +cryptodev_builtin_get_aes_algo(uint32_t key_len, Error **errp) +{ + int algo; + + if (key_len == 128 / 8) { + algo = QCRYPTO_CIPHER_ALG_AES_128; + } else if (key_len == 192 / 8) { + algo = QCRYPTO_CIPHER_ALG_AES_192; + } else if (key_len == 256 / 8) { + algo = QCRYPTO_CIPHER_ALG_AES_256; + } else { + error_setg(errp, "Unsupported key length :%u", key_len); + return -1; + } + + return algo; +} + +static int cryptodev_builtin_create_cipher_session( + CryptoDevBackendBuiltin *builtin, + CryptoDevBackendSymSessionInfo *sess_info, + Error **errp) +{ + int algo; + int mode; + QCryptoCipher *cipher; + int index; + CryptoDevBackendBuiltinSession *sess; + + if (sess_info->op_type != VIRTIO_CRYPTO_SYM_OP_CIPHER) { + error_setg(errp, "Unsupported optype :%u", sess_info->op_type); + return -1; + } + + index = cryptodev_builtin_get_unused_session_index(builtin); + if (index < 0) { + error_setg(errp, "Total number of sessions created exceeds %u", + MAX_NUM_SESSIONS); + return -1; + } + + switch (sess_info->cipher_alg) { + case VIRTIO_CRYPTO_CIPHER_AES_ECB: + algo = cryptodev_builtin_get_aes_algo(sess_info->key_len, + errp); + if (algo < 0) { + return -1; + } + mode = QCRYPTO_CIPHER_MODE_ECB; + break; + case VIRTIO_CRYPTO_CIPHER_AES_CBC: + algo = cryptodev_builtin_get_aes_algo(sess_info->key_len, + errp); + if (algo < 0) { + return -1; + } + mode = QCRYPTO_CIPHER_MODE_CBC; + break; + case VIRTIO_CRYPTO_CIPHER_AES_CTR: + algo = cryptodev_builtin_get_aes_algo(sess_info->key_len, + errp); + if (algo < 0) { + return -1; + } + mode = QCRYPTO_CIPHER_MODE_CTR; + break; + case VIRTIO_CRYPTO_CIPHER_DES_ECB: + algo = QCRYPTO_CIPHER_ALG_DES_RFB; + mode = QCRYPTO_CIPHER_MODE_ECB; + break; + default: + error_setg(errp, "Unsupported cipher alg :%u", + sess_info->cipher_alg); + return -1; + } + + cipher = qcrypto_cipher_new(algo, mode, + sess_info->cipher_key, + sess_info->key_len, + errp); + if (!cipher) { + return -1; + } + + sess = g_new0(CryptoDevBackendBuiltinSession, 1); + sess->cipher = cipher; + sess->direction = sess_info->direction; + sess->type = sess_info->op_type; + + builtin->sessions[index] = sess; + + return index; +} + +static int64_t cryptodev_builtin_sym_create_session( + CryptoDevBackend *backend, + CryptoDevBackendSymSessionInfo *sess_info, + uint32_t queue_index, Error **errp) +{ + CryptoDevBackendBuiltin *builtin = + CRYPTODEV_BACKEND_BUILTIN(backend); + int64_t session_id = -1; + int ret; + + switch (sess_info->op_code) { + case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION: + ret = cryptodev_builtin_create_cipher_session( + builtin, sess_info, errp); + if (ret < 0) { + return ret; + } else { + session_id = ret; + } + break; + case VIRTIO_CRYPTO_HASH_CREATE_SESSION: + case VIRTIO_CRYPTO_MAC_CREATE_SESSION: + default: + error_setg(errp, "Unsupported opcode :%" PRIu32 "", + sess_info->op_code); + return -1; + } + + return session_id; +} + +static int cryptodev_builtin_sym_close_session( + CryptoDevBackend *backend, + uint64_t session_id, + uint32_t queue_index, Error **errp) +{ + CryptoDevBackendBuiltin *builtin = + CRYPTODEV_BACKEND_BUILTIN(backend); + + if (session_id >= MAX_NUM_SESSIONS || + builtin->sessions[session_id] == NULL) { + error_setg(errp, "Cannot find a valid session id: %" PRIu64 "", + session_id); + return -1; + } + + qcrypto_cipher_free(builtin->sessions[session_id]->cipher); + g_free(builtin->sessions[session_id]); + builtin->sessions[session_id] = NULL; + return 0; +} + +static int cryptodev_builtin_sym_operation( + CryptoDevBackend *backend, + CryptoDevBackendSymOpInfo *op_info, + uint32_t queue_index, Error **errp) +{ + CryptoDevBackendBuiltin *builtin = + CRYPTODEV_BACKEND_BUILTIN(backend); + CryptoDevBackendBuiltinSession *sess; + int ret; + + if (op_info->session_id >= MAX_NUM_SESSIONS || + builtin->sessions[op_info->session_id] == NULL) { + error_setg(errp, "Cannot find a valid session id: %" PRIu64 "", + op_info->session_id); + return -VIRTIO_CRYPTO_INVSESS; + } + + if (op_info->op_type == VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) { + error_setg(errp, + "Algorithm chain is unsupported for cryptdoev-builtin"); + return -VIRTIO_CRYPTO_NOTSUPP; + } + + sess = builtin->sessions[op_info->session_id]; + + ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv, + op_info->iv_len, errp); + if (ret < 0) { + return -VIRTIO_CRYPTO_ERR; + } + + if (sess->direction == VIRTIO_CRYPTO_OP_ENCRYPT) { + ret = qcrypto_cipher_encrypt(sess->cipher, op_info->src, + op_info->dst, op_info->src_len, errp); + if (ret < 0) { + return -VIRTIO_CRYPTO_ERR; + } + } else { + ret = qcrypto_cipher_decrypt(sess->cipher, op_info->src, + op_info->dst, op_info->src_len, errp); + if (ret < 0) { + return -VIRTIO_CRYPTO_ERR; + } + } + return VIRTIO_CRYPTO_OK; +} + +static void cryptodev_builtin_cleanup( + CryptoDevBackend *backend, + Error **errp) +{ + CryptoDevBackendBuiltin *builtin = + CRYPTODEV_BACKEND_BUILTIN(backend); + size_t i; + int queues = backend->conf.peers.queues; + CryptoDevBackendClient *cc; + + for (i = 0; i < MAX_NUM_SESSIONS; i++) { + if (builtin->sessions[i] != NULL) { + cryptodev_builtin_sym_close_session( + backend, i, 0, errp); + } + } + + assert(queues == 1); + + for (i = 0; i < queues; i++) { + cc = backend->conf.peers.ccs[i]; + if (cc) { + cryptodev_backend_free_client(cc); + backend->conf.peers.ccs[i] = NULL; + } + } +} + +static void +cryptodev_builtin_class_init(ObjectClass *oc, void *data) +{ + CryptoDevBackendClass *bc = CRYPTODEV_BACKEND_CLASS(oc); + + bc->init = cryptodev_builtin_init; + bc->cleanup = cryptodev_builtin_cleanup; + bc->create_session = cryptodev_builtin_sym_create_session; + bc->close_session = cryptodev_builtin_sym_close_session; + bc->do_sym_op = cryptodev_builtin_sym_operation; +} + +static const TypeInfo cryptodev_builtin_info = { + .name = TYPE_CRYPTODEV_BACKEND_BUILTIN, + .parent = TYPE_CRYPTODEV_BACKEND, + .class_init = cryptodev_builtin_class_init, + .instance_size = sizeof(CryptoDevBackendBuiltin), +}; + +static void +cryptodev_builtin_register_types(void) +{ + type_register_static(&cryptodev_builtin_info); +} + +type_init(cryptodev_builtin_register_types); diff --git a/backends/cryptodev.c b/backends/cryptodev.c new file mode 100644 index 0000000000..4a49f9762f --- /dev/null +++ b/backends/cryptodev.c @@ -0,0 +1,245 @@ +/* + * QEMU Crypto Device Implementation + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Gonglei <arei.gonglei@huawei.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "qemu/osdep.h" +#include "sysemu/cryptodev.h" +#include "hw/boards.h" +#include "qapi/error.h" +#include "qapi/visitor.h" +#include "qapi-types.h" +#include "qapi-visit.h" +#include "qemu/config-file.h" +#include "qom/object_interfaces.h" +#include "hw/virtio/virtio-crypto.h" + + +static QTAILQ_HEAD(, CryptoDevBackendClient) crypto_clients; + + +CryptoDevBackendClient * +cryptodev_backend_new_client(const char *model, + const char *name) +{ + CryptoDevBackendClient *cc; + + cc = g_malloc0(sizeof(CryptoDevBackendClient)); + cc->model = g_strdup(model); + if (name) { + cc->name = g_strdup(name); + } + + QTAILQ_INSERT_TAIL(&crypto_clients, cc, next); + + return cc; +} + +void cryptodev_backend_free_client( + CryptoDevBackendClient *cc) +{ + QTAILQ_REMOVE(&crypto_clients, cc, next); + g_free(cc->name); + g_free(cc->model); + g_free(cc->info_str); + g_free(cc); +} + +void cryptodev_backend_cleanup( + CryptoDevBackend *backend, + Error **errp) +{ + CryptoDevBackendClass *bc = + CRYPTODEV_BACKEND_GET_CLASS(backend); + + if (bc->cleanup) { + bc->cleanup(backend, errp); + } + + backend->ready = false; +} + +int64_t cryptodev_backend_sym_create_session( + CryptoDevBackend *backend, + CryptoDevBackendSymSessionInfo *sess_info, + uint32_t queue_index, Error **errp) +{ + CryptoDevBackendClass *bc = + CRYPTODEV_BACKEND_GET_CLASS(backend); + + if (bc->create_session) { + return bc->create_session(backend, sess_info, queue_index, errp); + } + + return -1; +} + +int cryptodev_backend_sym_close_session( + CryptoDevBackend *backend, + uint64_t session_id, + uint32_t queue_index, Error **errp) +{ + CryptoDevBackendClass *bc = + CRYPTODEV_BACKEND_GET_CLASS(backend); + + if (bc->close_session) { + return bc->close_session(backend, session_id, queue_index, errp); + } + + return -1; +} + +static int cryptodev_backend_sym_operation( + CryptoDevBackend *backend, + CryptoDevBackendSymOpInfo *op_info, + uint32_t queue_index, Error **errp) +{ + CryptoDevBackendClass *bc = + CRYPTODEV_BACKEND_GET_CLASS(backend); + + if (bc->do_sym_op) { + return bc->do_sym_op(backend, op_info, queue_index, errp); + } + + return -VIRTIO_CRYPTO_ERR; +} + +int cryptodev_backend_crypto_operation( + CryptoDevBackend *backend, + void *opaque, + uint32_t queue_index, Error **errp) +{ + VirtIOCryptoReq *req = opaque; + + if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) { + CryptoDevBackendSymOpInfo *op_info; + op_info = req->u.sym_op_info; + + return cryptodev_backend_sym_operation(backend, + op_info, queue_index, errp); + } else { + error_setg(errp, "Unsupported cryptodev alg type: %" PRIu32 "", + req->flags); + return -VIRTIO_CRYPTO_NOTSUPP; + } + + return -VIRTIO_CRYPTO_ERR; +} + +static void +cryptodev_backend_get_queues(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj); + uint32_t value = backend->conf.peers.queues; + + visit_type_uint32(v, name, &value, errp); +} + +static void +cryptodev_backend_set_queues(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj); + Error *local_err = NULL; + uint32_t value; + + visit_type_uint32(v, name, &value, &local_err); + if (local_err) { + goto out; + } + if (!value) { + error_setg(&local_err, "Property '%s.%s' doesn't take value '%" + PRIu32 "'", object_get_typename(obj), name, value); + goto out; + } + backend->conf.peers.queues = value; +out: + error_propagate(errp, local_err); +} + +static void +cryptodev_backend_complete(UserCreatable *uc, Error **errp) +{ + CryptoDevBackend *backend = CRYPTODEV_BACKEND(uc); + CryptoDevBackendClass *bc = CRYPTODEV_BACKEND_GET_CLASS(uc); + Error *local_err = NULL; + + if (bc->init) { + bc->init(backend, &local_err); + if (local_err) { + goto out; + } + } + backend->ready = true; + return; + +out: + backend->ready = false; + error_propagate(errp, local_err); +} + +static void cryptodev_backend_instance_init(Object *obj) +{ + object_property_add(obj, "queues", "int", + cryptodev_backend_get_queues, + cryptodev_backend_set_queues, + NULL, NULL, NULL); + /* Initialize devices' queues property to 1 */ + object_property_set_int(obj, 1, "queues", NULL); +} + +static void cryptodev_backend_finalize(Object *obj) +{ + +} + +static void +cryptodev_backend_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + + ucc->complete = cryptodev_backend_complete; + + QTAILQ_INIT(&crypto_clients); +} + +static const TypeInfo cryptodev_backend_info = { + .name = TYPE_CRYPTODEV_BACKEND, + .parent = TYPE_OBJECT, + .instance_size = sizeof(CryptoDevBackend), + .instance_init = cryptodev_backend_instance_init, + .instance_finalize = cryptodev_backend_finalize, + .class_size = sizeof(CryptoDevBackendClass), + .class_init = cryptodev_backend_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static void +cryptodev_backend_register_types(void) +{ + type_register_static(&cryptodev_backend_info); +} + +type_init(cryptodev_backend_register_types); diff --git a/block/nbd-client.c b/block/nbd-client.c index 2cf3237ef3..2a302de674 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -1,6 +1,7 @@ /* * QEMU Block driver for NBD * + * Copyright (C) 2016 Red Hat, Inc. * Copyright (C) 2008 Bull S.A.S. * Author: Laurent Vivier <Laurent.Vivier@bull.net> * @@ -32,7 +33,7 @@ #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs)) #define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs)) -static void nbd_recv_coroutines_enter_all(NbdClientSession *s) +static void nbd_recv_coroutines_enter_all(NBDClientSession *s) { int i; @@ -45,7 +46,7 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s) static void nbd_teardown_connection(BlockDriverState *bs) { - NbdClientSession *client = nbd_get_client_session(bs); + NBDClientSession *client = nbd_get_client_session(bs); if (!client->ioc) { /* Already closed */ return; @@ -67,7 +68,7 @@ static void nbd_teardown_connection(BlockDriverState *bs) static void nbd_reply_ready(void *opaque) { BlockDriverState *bs = opaque; - NbdClientSession *s = nbd_get_client_session(bs); + NBDClientSession *s = nbd_get_client_session(bs); uint64_t i; int ret; @@ -115,10 +116,10 @@ static void nbd_restart_write(void *opaque) } static int nbd_co_send_request(BlockDriverState *bs, - struct nbd_request *request, + NBDRequest *request, QEMUIOVector *qiov) { - NbdClientSession *s = nbd_get_client_session(bs); + NBDClientSession *s = nbd_get_client_session(bs); AioContext *aio_context; int rc, ret, i; @@ -166,9 +167,9 @@ static int nbd_co_send_request(BlockDriverState *bs, return rc; } -static void nbd_co_receive_reply(NbdClientSession *s, - struct nbd_request *request, - struct nbd_reply *reply, +static void nbd_co_receive_reply(NBDClientSession *s, + NBDRequest *request, + NBDReply *reply, QEMUIOVector *qiov) { int ret; @@ -194,13 +195,13 @@ static void nbd_co_receive_reply(NbdClientSession *s, } } -static void nbd_coroutine_start(NbdClientSession *s, - struct nbd_request *request) +static void nbd_coroutine_start(NBDClientSession *s, + NBDRequest *request) { /* Poor man semaphore. The free_sema is locked when no other request * can be accepted, and unlocked after receiving one reply. */ - if (s->in_flight >= MAX_NBD_REQUESTS - 1) { - qemu_co_mutex_lock(&s->free_sema); + if (s->in_flight == MAX_NBD_REQUESTS) { + qemu_co_queue_wait(&s->free_sema); assert(s->in_flight < MAX_NBD_REQUESTS); } s->in_flight++; @@ -208,26 +209,26 @@ static void nbd_coroutine_start(NbdClientSession *s, /* s->recv_coroutine[i] is set as soon as we get the send_lock. */ } -static void nbd_coroutine_end(NbdClientSession *s, - struct nbd_request *request) +static void nbd_coroutine_end(NBDClientSession *s, + NBDRequest *request) { int i = HANDLE_TO_INDEX(s, request->handle); s->recv_coroutine[i] = NULL; if (s->in_flight-- == MAX_NBD_REQUESTS) { - qemu_co_mutex_unlock(&s->free_sema); + qemu_co_queue_next(&s->free_sema); } } int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { - NbdClientSession *client = nbd_get_client_session(bs); - struct nbd_request request = { + NBDClientSession *client = nbd_get_client_session(bs); + NBDRequest request = { .type = NBD_CMD_READ, .from = offset, .len = bytes, }; - struct nbd_reply reply; + NBDReply reply; ssize_t ret; assert(bytes <= NBD_MAX_BUFFER_SIZE); @@ -247,18 +248,18 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { - NbdClientSession *client = nbd_get_client_session(bs); - struct nbd_request request = { + NBDClientSession *client = nbd_get_client_session(bs); + NBDRequest request = { .type = NBD_CMD_WRITE, .from = offset, .len = bytes, }; - struct nbd_reply reply; + NBDReply reply; ssize_t ret; if (flags & BDRV_REQ_FUA) { assert(client->nbdflags & NBD_FLAG_SEND_FUA); - request.type |= NBD_CMD_FLAG_FUA; + request.flags |= NBD_CMD_FLAG_FUA; } assert(bytes <= NBD_MAX_BUFFER_SIZE); @@ -274,11 +275,46 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, return -reply.error; } +int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, + int count, BdrvRequestFlags flags) +{ + ssize_t ret; + NBDClientSession *client = nbd_get_client_session(bs); + NBDRequest request = { + .type = NBD_CMD_WRITE_ZEROES, + .from = offset, + .len = count, + }; + NBDReply reply; + + if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) { + return -ENOTSUP; + } + + if (flags & BDRV_REQ_FUA) { + assert(client->nbdflags & NBD_FLAG_SEND_FUA); + request.flags |= NBD_CMD_FLAG_FUA; + } + if (!(flags & BDRV_REQ_MAY_UNMAP)) { + request.flags |= NBD_CMD_FLAG_NO_HOLE; + } + + nbd_coroutine_start(client, &request); + ret = nbd_co_send_request(bs, &request, NULL); + if (ret < 0) { + reply.error = -ret; + } else { + nbd_co_receive_reply(client, &request, &reply, NULL); + } + nbd_coroutine_end(client, &request); + return -reply.error; +} + int nbd_client_co_flush(BlockDriverState *bs) { - NbdClientSession *client = nbd_get_client_session(bs); - struct nbd_request request = { .type = NBD_CMD_FLUSH }; - struct nbd_reply reply; + NBDClientSession *client = nbd_get_client_session(bs); + NBDRequest request = { .type = NBD_CMD_FLUSH }; + NBDReply reply; ssize_t ret; if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) { @@ -301,13 +337,13 @@ int nbd_client_co_flush(BlockDriverState *bs) int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) { - NbdClientSession *client = nbd_get_client_session(bs); - struct nbd_request request = { + NBDClientSession *client = nbd_get_client_session(bs); + NBDRequest request = { .type = NBD_CMD_TRIM, .from = offset, .len = count, }; - struct nbd_reply reply; + NBDReply reply; ssize_t ret; if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) { @@ -342,12 +378,8 @@ void nbd_client_attach_aio_context(BlockDriverState *bs, void nbd_client_close(BlockDriverState *bs) { - NbdClientSession *client = nbd_get_client_session(bs); - struct nbd_request request = { - .type = NBD_CMD_DISC, - .from = 0, - .len = 0 - }; + NBDClientSession *client = nbd_get_client_session(bs); + NBDRequest request = { .type = NBD_CMD_DISC }; if (client->ioc == NULL) { return; @@ -365,7 +397,7 @@ int nbd_client_init(BlockDriverState *bs, const char *hostname, Error **errp) { - NbdClientSession *client = nbd_get_client_session(bs); + NBDClientSession *client = nbd_get_client_session(bs); int ret; /* NBD handshake */ @@ -386,7 +418,7 @@ int nbd_client_init(BlockDriverState *bs, } qemu_co_mutex_init(&client->send_mutex); - qemu_co_mutex_init(&client->free_sema); + qemu_co_queue_init(&client->free_sema); client->sioc = sioc; object_ref(OBJECT(client->sioc)); diff --git a/block/nbd-client.h b/block/nbd-client.h index 044aca4530..f8d6006849 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -17,24 +17,24 @@ #define MAX_NBD_REQUESTS 16 -typedef struct NbdClientSession { +typedef struct NBDClientSession { QIOChannelSocket *sioc; /* The master data channel */ QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ uint16_t nbdflags; off_t size; CoMutex send_mutex; - CoMutex free_sema; + CoQueue free_sema; Coroutine *send_coroutine; int in_flight; Coroutine *recv_coroutine[MAX_NBD_REQUESTS]; - struct nbd_reply reply; + NBDReply reply; bool is_unix; -} NbdClientSession; +} NBDClientSession; -NbdClientSession *nbd_get_client_session(BlockDriverState *bs); +NBDClientSession *nbd_get_client_session(BlockDriverState *bs); int nbd_client_init(BlockDriverState *bs, QIOChannelSocket *sock, @@ -48,6 +48,8 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count); int nbd_client_co_flush(BlockDriverState *bs); int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); +int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, + int count, BdrvRequestFlags flags); int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); diff --git a/block/nbd.c b/block/nbd.c index 6e837f80c9..9cff8396f9 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -44,7 +44,7 @@ #define EN_OPTSTR ":exportname=" typedef struct BDRVNBDState { - NbdClientSession client; + NBDClientSession client; /* For nbd_refresh_filename() */ SocketAddress *saddr; @@ -294,7 +294,7 @@ done: return saddr; } -NbdClientSession *nbd_get_client_session(BlockDriverState *bs) +NBDClientSession *nbd_get_client_session(BlockDriverState *bs) { BDRVNBDState *s = bs->opaque; return &s->client; @@ -466,6 +466,7 @@ static int nbd_co_flush(BlockDriverState *bs) static void nbd_refresh_limits(BlockDriverState *bs, Error **errp) { bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE; + bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE; bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE; } @@ -558,6 +559,7 @@ static BlockDriver bdrv_nbd = { .bdrv_file_open = nbd_open, .bdrv_co_preadv = nbd_client_co_preadv, .bdrv_co_pwritev = nbd_client_co_pwritev, + .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, @@ -576,6 +578,7 @@ static BlockDriver bdrv_nbd_tcp = { .bdrv_file_open = nbd_open, .bdrv_co_preadv = nbd_client_co_preadv, .bdrv_co_pwritev = nbd_client_co_pwritev, + .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, @@ -594,6 +597,7 @@ static BlockDriver bdrv_nbd_unix = { .bdrv_file_open = nbd_open, .bdrv_co_preadv = nbd_client_co_preadv, .bdrv_co_pwritev = nbd_client_co_pwritev, + .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, diff --git a/docs/rcu.txt b/docs/rcu.txt index a70b72c545..c84e7f42b2 100644 --- a/docs/rcu.txt +++ b/docs/rcu.txt @@ -145,7 +145,7 @@ The core RCU API is small: and then read from there. RCU read-side critical sections must use atomic_rcu_read() to - read data, unless concurrent writes are presented by another + read data, unless concurrent writes are prevented by another synchronization mechanism. Furthermore, RCU read-side critical sections should traverse the diff --git a/docs/specs/acpi_mem_hotplug.txt b/docs/specs/acpi_mem_hotplug.txt index 3df3620ce4..cb26dd27c4 100644 --- a/docs/specs/acpi_mem_hotplug.txt +++ b/docs/specs/acpi_mem_hotplug.txt @@ -4,6 +4,9 @@ QEMU<->ACPI BIOS memory hotplug interface ACPI BIOS GPE.3 handler is dedicated for notifying OS about memory hot-add and hot-remove events. +ACPI BIOS GPE.4 handler is dedicated for notifying OS about nvdimm device +hot-add and hot-remove events. + Memory hot-plug interface (IO port 0xa00-0xa17, 1-4 byte access): --------------------------------------------------------------- 0xa00: diff --git a/docs/specs/acpi_nvdimm.txt b/docs/specs/acpi_nvdimm.txt index 0fdd251fc0..4aa5e3de29 100644 --- a/docs/specs/acpi_nvdimm.txt +++ b/docs/specs/acpi_nvdimm.txt @@ -127,6 +127,58 @@ _DSM process diagram: | result from the page | | | +--------------------------+ +--------------+ - _FIT implementation - ------------------- - TODO (will fill it when nvdimm hotplug is introduced) +Device Handle Reservation +------------------------- +As we mentioned above, byte 0 ~ byte 3 in the DSM memory save NVDIMM device +handle. The handle is completely QEMU internal thing, the values in range +[0, 0xFFFF] indicate nvdimm device (O means nvdimm root device named NVDR), +other values are reserved by other purpose. + +Current reserved handle: +0x10000 is reserved for QEMU internal DSM function called on the root +device. + +QEMU internal use only _DSM function +------------------------------------ +UUID, 648B9CF2-CDA1-4312-8AD9-49C4AF32BD62, is reserved for QEMU internal +DSM function. + +There is the function introduced by QEMU and only used by QEMU internal. + +1) Read FIT + As we only reserved one page for NVDIMM ACPI it is impossible to map the + whole FIT data to guest's address space. This function is used by _FIT + method to read a piece of FIT data from QEMU. + + Input parameters: + Arg0 – UUID {set to 648B9CF2-CDA1-4312-8AD9-49C4AF32BD62} + Arg1 – Revision ID (set to 1) + Arg2 - Function Index, 0x1 + Arg3 - A package containing a buffer whose layout is as follows: + + +----------+-------------+-------------+-----------------------------------+ + | Filed | Byte Length | Byte Offset | Description | + +----------+-------------+-------------+-----------------------------------+ + | offset | 4 | 0 | the offset of FIT buffer | + +----------+-------------+-------------+-----------------------------------+ + + Output: + +----------+-------------+-------------+-----------------------------------+ + | Filed | Byte Length | Byte Offset | Description | + +----------+-------------+-------------+-----------------------------------+ + | | | | return status codes | + | | | | 0x100 indicates fit has been | + | status | 4 | 0 | updated | + | | | | other follows Chapter 3 in DSM | + | | | | Spec Rev1 | + +----------+-------------+-------------+-----------------------------------+ + | fit data | Varies | 4 | FIT data | + | | | | | + +----------+-------------+-------------+-----------------------------------+ + + The FIT offset is maintained by the caller itself, current offset plugs + the length returned by the function is the next offset we should read. + When all the FIT data has been read out, zero length is returned. + + If it returns 0x100, OSPM should restart to read FIT (read from offset 0 + again). @@ -493,7 +493,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, hwaddr *xlat, hwaddr *plen) { MemoryRegionSection *section; - AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch; + AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch); section = address_space_translate_internal(d, addr, xlat, plen, false); @@ -1231,6 +1231,15 @@ void qemu_mutex_unlock_ramlist(void) } #ifdef __linux__ +static int64_t get_file_size(int fd) +{ + int64_t size = lseek(fd, 0, SEEK_END); + if (size < 0) { + return -errno; + } + return size; +} + static void *file_ram_alloc(RAMBlock *block, ram_addr_t memory, const char *path, @@ -1242,6 +1251,7 @@ static void *file_ram_alloc(RAMBlock *block, char *c; void *area = MAP_FAILED; int fd = -1; + int64_t file_size; if (kvm_enabled() && !kvm_has_sync_mmu()) { error_setg(errp, @@ -1304,6 +1314,8 @@ static void *file_ram_alloc(RAMBlock *block, } #endif + file_size = get_file_size(fd); + if (memory < block->page_size) { error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to " "or larger than page size 0x%zx", @@ -1311,6 +1323,13 @@ static void *file_ram_alloc(RAMBlock *block, goto error; } + if (file_size > 0 && file_size < memory) { + error_setg(errp, "backing store %s size 0x%" PRIx64 + " does not match 'size' option 0x" RAM_ADDR_FMT, + path, file_size, memory); + goto error; + } + memory = ROUND_UP(memory, block->page_size); /* @@ -1318,8 +1337,16 @@ static void *file_ram_alloc(RAMBlock *block, * hosts, so don't bother bailing out on errors. * If anything goes wrong with it under other filesystems, * mmap will fail. + * + * Do not truncate the non-empty backend file to avoid corrupting + * the existing data in the file. Disabling shrinking is not + * enough. For example, the current vNVDIMM implementation stores + * the guest NVDIMM labels at the end of the backend file. If the + * backend file is later extended, QEMU will not be able to find + * those labels. Therefore, extending the non-empty backend file + * is disabled as well. */ - if (ftruncate(fd, memory)) { + if (!file_size && ftruncate(fd, memory)) { perror("ftruncate"); } @@ -2378,7 +2405,7 @@ static void tcg_commit(MemoryListener *listener) * may have split the RCU critical section. */ d = atomic_rcu_read(&cpuas->as->dispatch); - cpuas->memory_dispatch = d; + atomic_rcu_set(&cpuas->memory_dispatch, d); tlb_flush(cpuas->cpu, 1); } diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs index 4b7da6639f..489e63bb75 100644 --- a/hw/acpi/Makefile.objs +++ b/hw/acpi/Makefile.objs @@ -3,7 +3,7 @@ common-obj-$(CONFIG_ACPI_X86_ICH) += ich9.o tco.o common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu_hotplug.o common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o memory_hotplug_acpi_table.o common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o -obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o +common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o common-obj-$(CONFIG_ACPI) += acpi_interface.o common-obj-$(CONFIG_ACPI) += bios-linker-loader.o common-obj-$(CONFIG_ACPI) += aml-build.o diff --git a/hw/acpi/ipmi.c b/hw/acpi/ipmi.c index 7e74ce4460..651e2e94ea 100644 --- a/hw/acpi/ipmi.c +++ b/hw/acpi/ipmi.c @@ -99,6 +99,7 @@ void build_acpi_ipmi_devices(Aml *scope, BusState *bus) ii = IPMI_INTERFACE(obj); iic = IPMI_INTERFACE_GET_CLASS(obj); + memset(&info, 0, sizeof(info)); iic->get_fwinfo(ii, &info); aml_append(scope, aml_ipmi_device(&info)); } diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c index ec4e64b361..70f64517fd 100644 --- a/hw/acpi/memory_hotplug.c +++ b/hw/acpi/memory_hotplug.c @@ -2,6 +2,7 @@ #include "hw/acpi/memory_hotplug.h" #include "hw/acpi/pc-hotplug.h" #include "hw/mem/pc-dimm.h" +#include "hw/mem/nvdimm.h" #include "hw/boards.h" #include "hw/qdev-core.h" #include "trace.h" @@ -232,11 +233,8 @@ void acpi_memory_plug_cb(HotplugHandler *hotplug_dev, MemHotplugState *mem_st, DeviceState *dev, Error **errp) { MemStatus *mdev; - DeviceClass *dc = DEVICE_GET_CLASS(dev); - - if (!dc->hotpluggable) { - return; - } + AcpiEventStatusBits event; + bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); mdev = acpi_memory_slot_status(mem_st, dev, errp); if (!mdev) { @@ -244,10 +242,23 @@ void acpi_memory_plug_cb(HotplugHandler *hotplug_dev, MemHotplugState *mem_st, } mdev->dimm = dev; - mdev->is_enabled = true; + + /* + * do not set is_enabled and is_inserting if the slot is plugged with + * a nvdimm device to stop OSPM inquires memory region from the slot. + */ + if (is_nvdimm) { + event = ACPI_NVDIMM_HOTPLUG_STATUS; + } else { + mdev->is_enabled = true; + event = ACPI_MEMORY_HOTPLUG_STATUS; + } + if (dev->hotplugged) { - mdev->is_inserting = true; - acpi_send_event(DEVICE(hotplug_dev), ACPI_MEMORY_HOTPLUG_STATUS); + if (!is_nvdimm) { + mdev->is_inserting = true; + } + acpi_send_event(DEVICE(hotplug_dev), event); } } @@ -262,6 +273,8 @@ void acpi_memory_unplug_request_cb(HotplugHandler *hotplug_dev, return; } + /* nvdimm device hot unplug is not supported yet. */ + assert(!object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)); mdev->is_removing = true; acpi_send_event(DEVICE(hotplug_dev), ACPI_MEMORY_HOTPLUG_STATUS); } @@ -276,6 +289,8 @@ void acpi_memory_unplug_cb(MemHotplugState *mem_st, return; } + /* nvdimm device hot unplug is not supported yet. */ + assert(!object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)); mdev->is_enabled = false; mdev->dimm = NULL; } diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index e486128aa1..602ec54485 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -289,8 +289,6 @@ static void nvdimm_build_structure_memdev(GArray *structures, DeviceState *dev) { NvdimmNfitMemDev *nfit_memdev; - uint64_t addr = object_property_get_int(OBJECT(dev), PC_DIMM_ADDR_PROP, - NULL); uint64_t size = object_property_get_int(OBJECT(dev), PC_DIMM_SIZE_PROP, NULL); int slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, @@ -314,7 +312,8 @@ nvdimm_build_structure_memdev(GArray *structures, DeviceState *dev) /* The memory region on the device. */ nfit_memdev->region_len = cpu_to_le64(size); - nfit_memdev->region_dpa = cpu_to_le64(addr); + /* The device address starts from 0. */ + nfit_memdev->region_dpa = cpu_to_le64(0); /* Only one interleave for PMEM. */ nfit_memdev->interleave_ways = cpu_to_le16(1); @@ -349,8 +348,9 @@ static void nvdimm_build_structure_dcr(GArray *structures, DeviceState *dev) (DSM) in DSM Spec Rev1.*/); } -static GArray *nvdimm_build_device_structure(GSList *device_list) +static GArray *nvdimm_build_device_structure(void) { + GSList *device_list = nvdimm_get_plugged_device_list(); GArray *structures = g_array_new(false, true /* clear */, 1); for (; device_list; device_list = device_list->next) { @@ -368,28 +368,58 @@ static GArray *nvdimm_build_device_structure(GSList *device_list) /* build NVDIMM Control Region Structure. */ nvdimm_build_structure_dcr(structures, dev); } + g_slist_free(device_list); return structures; } -static void nvdimm_build_nfit(GSList *device_list, GArray *table_offsets, +static void nvdimm_init_fit_buffer(NvdimmFitBuffer *fit_buf) +{ + qemu_mutex_init(&fit_buf->lock); + fit_buf->fit = g_array_new(false, true /* clear */, 1); +} + +static void nvdimm_build_fit_buffer(NvdimmFitBuffer *fit_buf) +{ + qemu_mutex_lock(&fit_buf->lock); + g_array_free(fit_buf->fit, true); + fit_buf->fit = nvdimm_build_device_structure(); + fit_buf->dirty = true; + qemu_mutex_unlock(&fit_buf->lock); +} + +void nvdimm_acpi_hotplug(AcpiNVDIMMState *state) +{ + nvdimm_build_fit_buffer(&state->fit_buf); +} + +static void nvdimm_build_nfit(AcpiNVDIMMState *state, GArray *table_offsets, GArray *table_data, BIOSLinker *linker) { - GArray *structures = nvdimm_build_device_structure(device_list); + NvdimmFitBuffer *fit_buf = &state->fit_buf; unsigned int header; + qemu_mutex_lock(&fit_buf->lock); + + /* NVDIMM device is not plugged? */ + if (!fit_buf->fit->len) { + goto exit; + } + acpi_add_table(table_offsets, table_data); /* NFIT header. */ header = table_data->len; acpi_data_push(table_data, sizeof(NvdimmNfitHeader)); /* NVDIMM device structures. */ - g_array_append_vals(table_data, structures->data, structures->len); + g_array_append_vals(table_data, fit_buf->fit->data, fit_buf->fit->len); build_header(linker, table_data, (void *)(table_data->data + header), "NFIT", - sizeof(NvdimmNfitHeader) + structures->len, 1, NULL, NULL); - g_array_free(structures, true); + sizeof(NvdimmNfitHeader) + fit_buf->fit->len, 1, NULL, NULL); + +exit: + qemu_mutex_unlock(&fit_buf->lock); } struct NvdimmDsmIn { @@ -466,6 +496,22 @@ typedef struct NvdimmFuncSetLabelDataIn NvdimmFuncSetLabelDataIn; QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncSetLabelDataIn) + offsetof(NvdimmDsmIn, arg3) > 4096); +struct NvdimmFuncReadFITIn { + uint32_t offset; /* the offset of FIT buffer. */ +} QEMU_PACKED; +typedef struct NvdimmFuncReadFITIn NvdimmFuncReadFITIn; +QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncReadFITIn) + + offsetof(NvdimmDsmIn, arg3) > 4096); + +struct NvdimmFuncReadFITOut { + /* the size of buffer filled by QEMU. */ + uint32_t len; + uint32_t func_ret_status; /* return status code. */ + uint8_t fit[0]; /* the FIT data. */ +} QEMU_PACKED; +typedef struct NvdimmFuncReadFITOut NvdimmFuncReadFITOut; +QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncReadFITOut) > 4096); + static void nvdimm_dsm_function0(uint32_t supported_func, hwaddr dsm_mem_addr) { @@ -486,6 +532,74 @@ nvdimm_dsm_no_payload(uint32_t func_ret_status, hwaddr dsm_mem_addr) cpu_physical_memory_write(dsm_mem_addr, &out, sizeof(out)); } +#define NVDIMM_QEMU_RSVD_HANDLE_ROOT 0x10000 + +/* Read FIT data, defined in docs/specs/acpi_nvdimm.txt. */ +static void nvdimm_dsm_func_read_fit(AcpiNVDIMMState *state, NvdimmDsmIn *in, + hwaddr dsm_mem_addr) +{ + NvdimmFitBuffer *fit_buf = &state->fit_buf; + NvdimmFuncReadFITIn *read_fit; + NvdimmFuncReadFITOut *read_fit_out; + GArray *fit; + uint32_t read_len = 0, func_ret_status; + int size; + + read_fit = (NvdimmFuncReadFITIn *)in->arg3; + le32_to_cpus(&read_fit->offset); + + qemu_mutex_lock(&fit_buf->lock); + fit = fit_buf->fit; + + nvdimm_debug("Read FIT: offset %#x FIT size %#x Dirty %s.\n", + read_fit->offset, fit->len, fit_buf->dirty ? "Yes" : "No"); + + if (read_fit->offset > fit->len) { + func_ret_status = 3 /* Invalid Input Parameters */; + goto exit; + } + + /* It is the first time to read FIT. */ + if (!read_fit->offset) { + fit_buf->dirty = false; + } else if (fit_buf->dirty) { /* FIT has been changed during RFIT. */ + func_ret_status = 0x100 /* fit changed */; + goto exit; + } + + func_ret_status = 0 /* Success */; + read_len = MIN(fit->len - read_fit->offset, + 4096 - sizeof(NvdimmFuncReadFITOut)); + +exit: + size = sizeof(NvdimmFuncReadFITOut) + read_len; + read_fit_out = g_malloc(size); + + read_fit_out->len = cpu_to_le32(size); + read_fit_out->func_ret_status = cpu_to_le32(func_ret_status); + memcpy(read_fit_out->fit, fit->data + read_fit->offset, read_len); + + cpu_physical_memory_write(dsm_mem_addr, read_fit_out, size); + + g_free(read_fit_out); + qemu_mutex_unlock(&fit_buf->lock); +} + +static void nvdimm_dsm_reserved_root(AcpiNVDIMMState *state, NvdimmDsmIn *in, + hwaddr dsm_mem_addr) +{ + switch (in->function) { + case 0x0: + nvdimm_dsm_function0(0x1 | 1 << 1 /* Read FIT */, dsm_mem_addr); + return; + case 0x1 /*Read FIT */: + nvdimm_dsm_func_read_fit(state, in, dsm_mem_addr); + return; + } + + nvdimm_dsm_no_payload(1 /* Not Supported */, dsm_mem_addr); +} + static void nvdimm_dsm_root(NvdimmDsmIn *in, hwaddr dsm_mem_addr) { /* @@ -643,8 +757,8 @@ static void nvdimm_dsm_set_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in, return; } - assert(sizeof(*in) + sizeof(*set_label_data) + set_label_data->length <= - 4096); + assert(offsetof(NvdimmDsmIn, arg3) + + sizeof(*set_label_data) + set_label_data->length <= 4096); nvc->write_label_data(nvdimm, set_label_data->in_buf, set_label_data->length, set_label_data->offset); @@ -712,6 +826,7 @@ nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size) static void nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { + AcpiNVDIMMState *state = opaque; NvdimmDsmIn *in; hwaddr dsm_mem_addr = val; @@ -739,6 +854,11 @@ nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) goto exit; } + if (in->handle == NVDIMM_QEMU_RSVD_HANDLE_ROOT) { + nvdimm_dsm_reserved_root(state, in, dsm_mem_addr); + goto exit; + } + /* Handle 0 is reserved for NVDIMM Root Device. */ if (!in->handle) { nvdimm_dsm_root(in, dsm_mem_addr); @@ -772,23 +892,105 @@ void nvdimm_init_acpi_state(AcpiNVDIMMState *state, MemoryRegion *io, acpi_data_push(state->dsm_mem, sizeof(NvdimmDsmIn)); fw_cfg_add_file(fw_cfg, NVDIMM_DSM_MEM_FILE, state->dsm_mem->data, state->dsm_mem->len); + + nvdimm_init_fit_buffer(&state->fit_buf); } -#define NVDIMM_COMMON_DSM "NCAL" -#define NVDIMM_ACPI_MEM_ADDR "MEMA" +#define NVDIMM_COMMON_DSM "NCAL" +#define NVDIMM_ACPI_MEM_ADDR "MEMA" + +#define NVDIMM_DSM_MEMORY "NRAM" +#define NVDIMM_DSM_IOPORT "NPIO" + +#define NVDIMM_DSM_NOTIFY "NTFI" +#define NVDIMM_DSM_HANDLE "HDLE" +#define NVDIMM_DSM_REVISION "REVS" +#define NVDIMM_DSM_FUNCTION "FUNC" +#define NVDIMM_DSM_ARG3 "FARG" + +#define NVDIMM_DSM_OUT_BUF_SIZE "RLEN" +#define NVDIMM_DSM_OUT_BUF "ODAT" + +#define NVDIMM_DSM_RFIT_STATUS "RSTA" + +#define NVDIMM_QEMU_RSVD_UUID "648B9CF2-CDA1-4312-8AD9-49C4AF32BD62" static void nvdimm_build_common_dsm(Aml *dev) { - Aml *method, *ifctx, *function, *handle, *uuid, *dsm_mem, *result_size; + Aml *method, *ifctx, *function, *handle, *uuid, *dsm_mem, *elsectx2; Aml *elsectx, *unsupport, *unpatched, *expected_uuid, *uuid_invalid; - Aml *pckg, *pckg_index, *pckg_buf; + Aml *pckg, *pckg_index, *pckg_buf, *field, *dsm_out_buf, *dsm_out_buf_size; uint8_t byte_list[1]; method = aml_method(NVDIMM_COMMON_DSM, 5, AML_SERIALIZED); uuid = aml_arg(0); function = aml_arg(2); handle = aml_arg(4); - dsm_mem = aml_name(NVDIMM_ACPI_MEM_ADDR); + dsm_mem = aml_local(6); + dsm_out_buf = aml_local(7); + + aml_append(method, aml_store(aml_name(NVDIMM_ACPI_MEM_ADDR), dsm_mem)); + + /* map DSM memory and IO into ACPI namespace. */ + aml_append(method, aml_operation_region(NVDIMM_DSM_IOPORT, AML_SYSTEM_IO, + aml_int(NVDIMM_ACPI_IO_BASE), NVDIMM_ACPI_IO_LEN)); + aml_append(method, aml_operation_region(NVDIMM_DSM_MEMORY, + AML_SYSTEM_MEMORY, dsm_mem, sizeof(NvdimmDsmIn))); + + /* + * DSM notifier: + * NVDIMM_DSM_NOTIFY: write the address of DSM memory and notify QEMU to + * emulate the access. + * + * It is the IO port so that accessing them will cause VM-exit, the + * control will be transferred to QEMU. + */ + field = aml_field(NVDIMM_DSM_IOPORT, AML_DWORD_ACC, AML_NOLOCK, + AML_PRESERVE); + aml_append(field, aml_named_field(NVDIMM_DSM_NOTIFY, + sizeof(uint32_t) * BITS_PER_BYTE)); + aml_append(method, field); + + /* + * DSM input: + * NVDIMM_DSM_HANDLE: store device's handle, it's zero if the _DSM call + * happens on NVDIMM Root Device. + * NVDIMM_DSM_REVISION: store the Arg1 of _DSM call. + * NVDIMM_DSM_FUNCTION: store the Arg2 of _DSM call. + * NVDIMM_DSM_ARG3: store the Arg3 of _DSM call which is a Package + * containing function-specific arguments. + * + * They are RAM mapping on host so that these accesses never cause + * VM-EXIT. + */ + field = aml_field(NVDIMM_DSM_MEMORY, AML_DWORD_ACC, AML_NOLOCK, + AML_PRESERVE); + aml_append(field, aml_named_field(NVDIMM_DSM_HANDLE, + sizeof(typeof_field(NvdimmDsmIn, handle)) * BITS_PER_BYTE)); + aml_append(field, aml_named_field(NVDIMM_DSM_REVISION, + sizeof(typeof_field(NvdimmDsmIn, revision)) * BITS_PER_BYTE)); + aml_append(field, aml_named_field(NVDIMM_DSM_FUNCTION, + sizeof(typeof_field(NvdimmDsmIn, function)) * BITS_PER_BYTE)); + aml_append(field, aml_named_field(NVDIMM_DSM_ARG3, + (sizeof(NvdimmDsmIn) - offsetof(NvdimmDsmIn, arg3)) * BITS_PER_BYTE)); + aml_append(method, field); + + /* + * DSM output: + * NVDIMM_DSM_OUT_BUF_SIZE: the size of the buffer filled by QEMU. + * NVDIMM_DSM_OUT_BUF: the buffer QEMU uses to store the result. + * + * Since the page is reused by both input and out, the input data + * will be lost after storing new result into ODAT so we should fetch + * all the input data before writing the result. + */ + field = aml_field(NVDIMM_DSM_MEMORY, AML_DWORD_ACC, AML_NOLOCK, + AML_PRESERVE); + aml_append(field, aml_named_field(NVDIMM_DSM_OUT_BUF_SIZE, + sizeof(typeof_field(NvdimmDsmOut, len)) * BITS_PER_BYTE)); + aml_append(field, aml_named_field(NVDIMM_DSM_OUT_BUF, + (sizeof(NvdimmDsmOut) - offsetof(NvdimmDsmOut, data)) * BITS_PER_BYTE)); + aml_append(method, field); /* * do not support any method if DSM memory address has not been @@ -804,9 +1006,15 @@ static void nvdimm_build_common_dsm(Aml *dev) /* UUID for NVDIMM Root Device */, expected_uuid)); aml_append(method, ifctx); elsectx = aml_else(); - aml_append(elsectx, aml_store( + ifctx = aml_if(aml_equal(handle, aml_int(NVDIMM_QEMU_RSVD_HANDLE_ROOT))); + aml_append(ifctx, aml_store(aml_touuid(NVDIMM_QEMU_RSVD_UUID + /* UUID for QEMU internal use */), expected_uuid)); + aml_append(elsectx, ifctx); + elsectx2 = aml_else(); + aml_append(elsectx2, aml_store( aml_touuid("4309AC30-0D11-11E4-9191-0800200C9A66") /* UUID for NVDIMM Devices */, expected_uuid)); + aml_append(elsectx, elsectx2); aml_append(method, elsectx); uuid_invalid = aml_lnot(aml_equal(uuid, expected_uuid)); @@ -832,9 +1040,9 @@ static void nvdimm_build_common_dsm(Aml *dev) * it reserves 0 for root device and is the handle for NVDIMM devices. * See the comments in nvdimm_slot_to_handle(). */ - aml_append(method, aml_store(handle, aml_name("HDLE"))); - aml_append(method, aml_store(aml_arg(1), aml_name("REVS"))); - aml_append(method, aml_store(aml_arg(2), aml_name("FUNC"))); + aml_append(method, aml_store(handle, aml_name(NVDIMM_DSM_HANDLE))); + aml_append(method, aml_store(aml_arg(1), aml_name(NVDIMM_DSM_REVISION))); + aml_append(method, aml_store(aml_arg(2), aml_name(NVDIMM_DSM_FUNCTION))); /* * The fourth parameter (Arg3) of _DSM is a package which contains @@ -852,24 +1060,26 @@ static void nvdimm_build_common_dsm(Aml *dev) pckg_buf = aml_local(3); aml_append(ifctx, aml_store(aml_index(pckg, aml_int(0)), pckg_index)); aml_append(ifctx, aml_store(aml_derefof(pckg_index), pckg_buf)); - aml_append(ifctx, aml_store(pckg_buf, aml_name("ARG3"))); + aml_append(ifctx, aml_store(pckg_buf, aml_name(NVDIMM_DSM_ARG3))); aml_append(method, ifctx); /* * tell QEMU about the real address of DSM memory, then QEMU * gets the control and fills the result in DSM memory. */ - aml_append(method, aml_store(dsm_mem, aml_name("NTFI"))); - - result_size = aml_local(1); - aml_append(method, aml_store(aml_name("RLEN"), result_size)); - aml_append(method, aml_store(aml_shiftleft(result_size, aml_int(3)), - result_size)); - aml_append(method, aml_create_field(aml_name("ODAT"), aml_int(0), - result_size, "OBUF")); + aml_append(method, aml_store(dsm_mem, aml_name(NVDIMM_DSM_NOTIFY))); + + dsm_out_buf_size = aml_local(1); + /* RLEN is not included in the payload returned to guest. */ + aml_append(method, aml_subtract(aml_name(NVDIMM_DSM_OUT_BUF_SIZE), + aml_int(4), dsm_out_buf_size)); + aml_append(method, aml_store(aml_shiftleft(dsm_out_buf_size, aml_int(3)), + dsm_out_buf_size)); + aml_append(method, aml_create_field(aml_name(NVDIMM_DSM_OUT_BUF), + aml_int(0), dsm_out_buf_size, "OBUF")); aml_append(method, aml_concatenate(aml_buffer(0, NULL), aml_name("OBUF"), - aml_arg(6))); - aml_append(method, aml_return(aml_arg(6))); + dsm_out_buf)); + aml_append(method, aml_return(dsm_out_buf)); aml_append(dev, method); } @@ -884,12 +1094,110 @@ static void nvdimm_build_device_dsm(Aml *dev, uint32_t handle) aml_append(dev, method); } -static void nvdimm_build_nvdimm_devices(GSList *device_list, Aml *root_dev) +static void nvdimm_build_fit(Aml *dev) { - for (; device_list; device_list = device_list->next) { - DeviceState *dev = device_list->data; - int slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, - NULL); + Aml *method, *pkg, *buf, *buf_size, *offset, *call_result; + Aml *whilectx, *ifcond, *ifctx, *elsectx, *fit; + + buf = aml_local(0); + buf_size = aml_local(1); + fit = aml_local(2); + + aml_append(dev, aml_create_dword_field(aml_buffer(4, NULL), + aml_int(0), NVDIMM_DSM_RFIT_STATUS)); + + /* build helper function, RFIT. */ + method = aml_method("RFIT", 1, AML_SERIALIZED); + aml_append(method, aml_create_dword_field(aml_buffer(4, NULL), + aml_int(0), "OFST")); + + /* prepare input package. */ + pkg = aml_package(1); + aml_append(method, aml_store(aml_arg(0), aml_name("OFST"))); + aml_append(pkg, aml_name("OFST")); + + /* call Read_FIT function. */ + call_result = aml_call5(NVDIMM_COMMON_DSM, + aml_touuid(NVDIMM_QEMU_RSVD_UUID), + aml_int(1) /* Revision 1 */, + aml_int(0x1) /* Read FIT */, + pkg, aml_int(NVDIMM_QEMU_RSVD_HANDLE_ROOT)); + aml_append(method, aml_store(call_result, buf)); + + /* handle _DSM result. */ + aml_append(method, aml_create_dword_field(buf, + aml_int(0) /* offset at byte 0 */, "STAU")); + + aml_append(method, aml_store(aml_name("STAU"), + aml_name(NVDIMM_DSM_RFIT_STATUS))); + + /* if something is wrong during _DSM. */ + ifcond = aml_equal(aml_int(0 /* Success */), aml_name("STAU")); + ifctx = aml_if(aml_lnot(ifcond)); + aml_append(ifctx, aml_return(aml_buffer(0, NULL))); + aml_append(method, ifctx); + + aml_append(method, aml_store(aml_sizeof(buf), buf_size)); + aml_append(method, aml_subtract(buf_size, + aml_int(4) /* the size of "STAU" */, + buf_size)); + + /* if we read the end of fit. */ + ifctx = aml_if(aml_equal(buf_size, aml_int(0))); + aml_append(ifctx, aml_return(aml_buffer(0, NULL))); + aml_append(method, ifctx); + + aml_append(method, aml_store(aml_shiftleft(buf_size, aml_int(3)), + buf_size)); + aml_append(method, aml_create_field(buf, + aml_int(4 * BITS_PER_BYTE), /* offset at byte 4.*/ + buf_size, "BUFF")); + aml_append(method, aml_return(aml_name("BUFF"))); + aml_append(dev, method); + + /* build _FIT. */ + method = aml_method("_FIT", 0, AML_SERIALIZED); + offset = aml_local(3); + + aml_append(method, aml_store(aml_buffer(0, NULL), fit)); + aml_append(method, aml_store(aml_int(0), offset)); + + whilectx = aml_while(aml_int(1)); + aml_append(whilectx, aml_store(aml_call1("RFIT", offset), buf)); + aml_append(whilectx, aml_store(aml_sizeof(buf), buf_size)); + + /* + * if fit buffer was changed during RFIT, read from the beginning + * again. + */ + ifctx = aml_if(aml_equal(aml_name(NVDIMM_DSM_RFIT_STATUS), + aml_int(0x100 /* fit changed */))); + aml_append(ifctx, aml_store(aml_buffer(0, NULL), fit)); + aml_append(ifctx, aml_store(aml_int(0), offset)); + aml_append(whilectx, ifctx); + + elsectx = aml_else(); + + /* finish fit read if no data is read out. */ + ifctx = aml_if(aml_equal(buf_size, aml_int(0))); + aml_append(ifctx, aml_return(fit)); + aml_append(elsectx, ifctx); + + /* update the offset. */ + aml_append(elsectx, aml_add(offset, buf_size, offset)); + /* append the data we read out to the fit buffer. */ + aml_append(elsectx, aml_concatenate(fit, buf, fit)); + aml_append(whilectx, elsectx); + aml_append(method, whilectx); + + aml_append(dev, method); +} + +static void nvdimm_build_nvdimm_devices(Aml *root_dev, uint32_t ram_slots) +{ + uint32_t slot; + + for (slot = 0; slot < ram_slots; slot++) { uint32_t handle = nvdimm_slot_to_handle(slot); Aml *nvdimm_dev; @@ -910,11 +1218,11 @@ static void nvdimm_build_nvdimm_devices(GSList *device_list, Aml *root_dev) } } -static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets, - GArray *table_data, BIOSLinker *linker, - GArray *dsm_dma_arrea) +static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data, + BIOSLinker *linker, GArray *dsm_dma_arrea, + uint32_t ram_slots) { - Aml *ssdt, *sb_scope, *dev, *field; + Aml *ssdt, *sb_scope, *dev; int mem_addr_offset, nvdimm_ssdt; acpi_add_table(table_offsets, table_data); @@ -939,69 +1247,13 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets, */ aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0012"))); - /* map DSM memory and IO into ACPI namespace. */ - aml_append(dev, aml_operation_region("NPIO", AML_SYSTEM_IO, - aml_int(NVDIMM_ACPI_IO_BASE), NVDIMM_ACPI_IO_LEN)); - aml_append(dev, aml_operation_region("NRAM", AML_SYSTEM_MEMORY, - aml_name(NVDIMM_ACPI_MEM_ADDR), sizeof(NvdimmDsmIn))); - - /* - * DSM notifier: - * NTFI: write the address of DSM memory and notify QEMU to emulate - * the access. - * - * It is the IO port so that accessing them will cause VM-exit, the - * control will be transferred to QEMU. - */ - field = aml_field("NPIO", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("NTFI", - sizeof(uint32_t) * BITS_PER_BYTE)); - aml_append(dev, field); - - /* - * DSM input: - * HDLE: store device's handle, it's zero if the _DSM call happens - * on NVDIMM Root Device. - * REVS: store the Arg1 of _DSM call. - * FUNC: store the Arg2 of _DSM call. - * ARG3: store the Arg3 of _DSM call. - * - * They are RAM mapping on host so that these accesses never cause - * VM-EXIT. - */ - field = aml_field("NRAM", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("HDLE", - sizeof(typeof_field(NvdimmDsmIn, handle)) * BITS_PER_BYTE)); - aml_append(field, aml_named_field("REVS", - sizeof(typeof_field(NvdimmDsmIn, revision)) * BITS_PER_BYTE)); - aml_append(field, aml_named_field("FUNC", - sizeof(typeof_field(NvdimmDsmIn, function)) * BITS_PER_BYTE)); - aml_append(field, aml_named_field("ARG3", - (sizeof(NvdimmDsmIn) - offsetof(NvdimmDsmIn, arg3)) * BITS_PER_BYTE)); - aml_append(dev, field); - - /* - * DSM output: - * RLEN: the size of the buffer filled by QEMU. - * ODAT: the buffer QEMU uses to store the result. - * - * Since the page is reused by both input and out, the input data - * will be lost after storing new result into ODAT so we should fetch - * all the input data before writing the result. - */ - field = aml_field("NRAM", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("RLEN", - sizeof(typeof_field(NvdimmDsmOut, len)) * BITS_PER_BYTE)); - aml_append(field, aml_named_field("ODAT", - (sizeof(NvdimmDsmOut) - offsetof(NvdimmDsmOut, data)) * BITS_PER_BYTE)); - aml_append(dev, field); - nvdimm_build_common_dsm(dev); /* 0 is reserved for root device. */ nvdimm_build_device_dsm(dev, 0); + nvdimm_build_fit(dev); - nvdimm_build_nvdimm_devices(device_list, dev); + nvdimm_build_nvdimm_devices(dev, ram_slots); aml_append(sb_scope, dev); aml_append(ssdt, sb_scope); @@ -1026,17 +1278,17 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets, } void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data, - BIOSLinker *linker, GArray *dsm_dma_arrea) + BIOSLinker *linker, AcpiNVDIMMState *state, + uint32_t ram_slots) { - GSList *device_list; + nvdimm_build_nfit(state, table_offsets, table_data, linker); - /* no NVDIMM device is plugged. */ - device_list = nvdimm_get_plugged_device_list(); - if (!device_list) { - return; + /* + * NVDIMM device is allowed to be plugged only if there is available + * slot. + */ + if (ram_slots) { + nvdimm_build_ssdt(table_offsets, table_data, linker, state->dsm_mem, + ram_slots); } - nvdimm_build_nfit(device_list, table_offsets, table_data, linker); - nvdimm_build_ssdt(device_list, table_offsets, table_data, linker, - dsm_dma_arrea); - g_slist_free(device_list); } diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 704a763603..90ef557c8c 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -88,23 +88,28 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, *dataplane = NULL; - if (!conf->iothread) { - return; - } + if (conf->iothread) { + if (!k->set_guest_notifiers || !k->ioeventfd_assign) { + error_setg(errp, + "device is incompatible with iothread " + "(transport does not support notifiers)"); + return; + } + if (!virtio_device_ioeventfd_enabled(vdev)) { + error_setg(errp, "ioeventfd is required for iothread"); + return; + } - /* Don't try if transport does not support notifiers. */ - if (!k->set_guest_notifiers || !k->ioeventfd_started) { - error_setg(errp, - "device is incompatible with dataplane " - "(transport does not support notifiers)"); - return; + /* If dataplane is (re-)enabled while the guest is running there could + * be block jobs that can conflict. + */ + if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + error_prepend(errp, "cannot start virtio-blk dataplane: "); + return; + } } - - /* If dataplane is (re-)enabled while the guest is running there could be - * block jobs that can conflict. - */ - if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { - error_prepend(errp, "cannot start dataplane thread: "); + /* Don't try if transport does not support notifiers. */ + if (!virtio_device_ioeventfd_enabled(vdev)) { return; } @@ -112,9 +117,13 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, s->vdev = vdev; s->conf = conf; - s->iothread = conf->iothread; - object_ref(OBJECT(s->iothread)); - s->ctx = iothread_get_aio_context(s->iothread); + if (conf->iothread) { + s->iothread = conf->iothread; + object_ref(OBJECT(s->iothread)); + s->ctx = iothread_get_aio_context(s->iothread); + } else { + s->ctx = qemu_get_aio_context(); + } s->bh = aio_bh_new(s->ctx, notify_guest_bh, s); s->batch_notify_vqs = bitmap_new(conf->num_queues); @@ -124,14 +133,19 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, /* Context: QEMU global mutex held */ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) { + VirtIOBlock *vblk; + if (!s) { return; } - virtio_blk_data_plane_stop(s); + vblk = VIRTIO_BLK(s->vdev); + assert(!vblk->dataplane_started); g_free(s->batch_notify_vqs); qemu_bh_delete(s->bh); - object_unref(OBJECT(s->iothread)); + if (s->iothread) { + object_unref(OBJECT(s->iothread)); + } g_free(s); } @@ -147,17 +161,18 @@ static void virtio_blk_data_plane_handle_output(VirtIODevice *vdev, } /* Context: QEMU global mutex held */ -void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) +int virtio_blk_data_plane_start(VirtIODevice *vdev) { - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev))); + VirtIOBlock *vblk = VIRTIO_BLK(vdev); + VirtIOBlockDataPlane *s = vblk->dataplane; + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - VirtIOBlock *vblk = VIRTIO_BLK(s->vdev); unsigned i; unsigned nvqs = s->conf->num_queues; int r; if (vblk->dataplane_started || s->starting) { - return; + return 0; } s->starting = true; @@ -204,20 +219,22 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) virtio_blk_data_plane_handle_output); } aio_context_release(s->ctx); - return; + return 0; fail_guest_notifiers: vblk->dataplane_disabled = true; s->starting = false; vblk->dataplane_started = true; + return -ENOSYS; } /* Context: QEMU global mutex held */ -void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) +void virtio_blk_data_plane_stop(VirtIODevice *vdev) { - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev))); + VirtIOBlock *vblk = VIRTIO_BLK(vdev); + VirtIOBlockDataPlane *s = vblk->dataplane; + BusState *qbus = qdev_get_parent_bus(DEVICE(vblk)); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - VirtIOBlock *vblk = VIRTIO_BLK(s->vdev); unsigned i; unsigned nvqs = s->conf->num_queues; diff --git a/hw/block/dataplane/virtio-blk.h b/hw/block/dataplane/virtio-blk.h index b1f0b95b32..db3f47b173 100644 --- a/hw/block/dataplane/virtio-blk.h +++ b/hw/block/dataplane/virtio-blk.h @@ -23,9 +23,9 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, VirtIOBlockDataPlane **dataplane, Error **errp); void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s); -void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s); -void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s); -void virtio_blk_data_plane_drain(VirtIOBlockDataPlane *s); void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq); +int virtio_blk_data_plane_start(VirtIODevice *vdev); +void virtio_blk_data_plane_stop(VirtIODevice *vdev); + #endif /* HW_DATAPLANE_VIRTIO_BLK_H */ diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 37fe72bdcd..0c5fd27593 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -611,7 +611,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start * dataplane here instead of waiting for .set_status(). */ - virtio_blk_data_plane_start(s->dataplane); + virtio_device_start_ioeventfd(vdev); if (!s->dataplane_disabled) { return; } @@ -687,11 +687,9 @@ static void virtio_blk_reset(VirtIODevice *vdev) virtio_blk_free_request(req); } - if (s->dataplane) { - virtio_blk_data_plane_stop(s->dataplane); - } aio_context_release(ctx); + assert(!s->dataplane_started); blk_set_enable_write_cache(s->blk, s->original_wce); } @@ -789,9 +787,8 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) { VirtIOBlock *s = VIRTIO_BLK(vdev); - if (s->dataplane && !(status & (VIRTIO_CONFIG_S_DRIVER | - VIRTIO_CONFIG_S_DRIVER_OK))) { - virtio_blk_data_plane_stop(s->dataplane); + if (!(status & (VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK))) { + assert(!s->dataplane_started); } if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { @@ -919,7 +916,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1; for (i = 0; i < conf->num_queues; i++) { - virtio_add_queue_aio(vdev, 128, virtio_blk_handle_output); + virtio_add_queue(vdev, 128, virtio_blk_handle_output); } virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); if (err != NULL) { @@ -1002,6 +999,8 @@ static void virtio_blk_class_init(ObjectClass *klass, void *data) vdc->reset = virtio_blk_reset; vdc->save = virtio_blk_save_device; vdc->load = virtio_blk_load_device; + vdc->start_ioeventfd = virtio_blk_data_plane_start; + vdc->stop_ioeventfd = virtio_blk_data_plane_stop; } static const TypeInfo virtio_blk_info = { diff --git a/hw/core/hotplug.c b/hw/core/hotplug.c index 17ac986685..ab34c19461 100644 --- a/hw/core/hotplug.c +++ b/hw/core/hotplug.c @@ -35,6 +35,17 @@ void hotplug_handler_plug(HotplugHandler *plug_handler, } } +void hotplug_handler_post_plug(HotplugHandler *plug_handler, + DeviceState *plugged_dev, + Error **errp) +{ + HotplugHandlerClass *hdc = HOTPLUG_HANDLER_GET_CLASS(plug_handler); + + if (hdc->post_plug) { + hdc->post_plug(plug_handler, plugged_dev, errp); + } +} + void hotplug_handler_unplug_request(HotplugHandler *plug_handler, DeviceState *plugged_dev, Error **errp) diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 57834423b9..d835e6259a 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -945,10 +945,21 @@ static void device_set_realized(Object *obj, bool value, Error **errp) goto child_realize_fail; } } + if (dev->hotplugged) { device_reset(dev); } dev->pending_deleted_event = false; + dev->realized = value; + + if (hotplug_ctrl) { + hotplug_handler_post_plug(hotplug_ctrl, dev, &local_err); + } + + if (local_err != NULL) { + dev->realized = value; + goto post_realize_fail; + } } else if (!value && dev->realized) { Error **local_errp = NULL; QLIST_FOREACH(bus, &dev->child_bus, sibling) { @@ -965,13 +976,14 @@ static void device_set_realized(Object *obj, bool value, Error **errp) } dev->pending_deleted_event = true; DEVICE_LISTENER_CALL(unrealize, Reverse, dev); - } - if (local_err != NULL) { - goto fail; + if (local_err != NULL) { + goto fail; + } + + dev->realized = value; } - dev->realized = value; return; child_realize_fail: diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 13cbbde712..c02f408ab2 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2068,6 +2068,13 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, method = aml_method("_E03", 0, AML_NOTSERIALIZED); aml_append(method, aml_call0(MEMORY_HOTPLUG_HANDLER_PATH)); aml_append(scope, method); + + if (pcms->acpi_nvdimm_state.is_enabled) { + method = aml_method("_E04", 0, AML_NOTSERIALIZED); + aml_append(method, aml_notify(aml_name("\\_SB.NVDR"), + aml_int(0x80))); + aml_append(scope, method); + } } aml_append(dsdt, scope); @@ -2810,7 +2817,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) } if (pcms->acpi_nvdimm_state.is_enabled) { nvdimm_build_acpi(table_offsets, tables_blob, tables->linker, - pcms->acpi_nvdimm_state.dsm_mem); + &pcms->acpi_nvdimm_state, machine->ram_slots); } /* Add tables supplied by user (if any) */ diff --git a/hw/i386/pc.c b/hw/i386/pc.c index fbd9aed72b..2c37a78c7a 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1721,6 +1721,16 @@ out: error_propagate(errp, local_err); } +static void pc_dimm_post_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(hotplug_dev); + + if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) { + nvdimm_acpi_hotplug(&pcms->acpi_nvdimm_state); + } +} + static void pc_dimm_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -1734,6 +1744,12 @@ static void pc_dimm_unplug_request(HotplugHandler *hotplug_dev, goto out; } + if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) { + error_setg(&local_err, + "nvdimm device hot unplug is not supported yet."); + goto out; + } + hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); @@ -1751,6 +1767,12 @@ static void pc_dimm_unplug(HotplugHandler *hotplug_dev, HotplugHandlerClass *hhc; Error *local_err = NULL; + if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) { + error_setg(&local_err, + "nvdimm device hot unplug is not supported yet."); + goto out; + } + hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); hhc->unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); @@ -1986,6 +2008,14 @@ static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev, } } +static void pc_machine_device_post_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + pc_dimm_post_plug(hotplug_dev, dev, errp); + } +} + static void pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -2292,6 +2322,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->reset = pc_machine_reset; hc->pre_plug = pc_machine_device_pre_plug_cb; hc->plug = pc_machine_device_plug_cb; + hc->post_plug = pc_machine_device_post_plug_cb; hc->unplug_request = pc_machine_device_unplug_request_cb; hc->unplug = pc_machine_device_unplug_cb; nc->nmi_monitor_handler = x86_nmi; diff --git a/hw/ipmi/Makefile.objs b/hw/ipmi/Makefile.objs index a90318d5ba..1b422bbee0 100644 --- a/hw/ipmi/Makefile.objs +++ b/hw/ipmi/Makefile.objs @@ -1,5 +1,5 @@ common-obj-$(CONFIG_IPMI) += ipmi.o common-obj-$(CONFIG_IPMI_LOCAL) += ipmi_bmc_sim.o -common-obj-$(CONFIG_IPMI_LOCAL) += ipmi_bmc_extern.o +common-obj-$(CONFIG_IPMI_EXTERN) += ipmi_bmc_extern.o common-obj-$(CONFIG_ISA_IPMI_KCS) += isa_ipmi_kcs.o common-obj-$(CONFIG_ISA_IPMI_BT) += isa_ipmi_bt.o diff --git a/hw/ipmi/ipmi.c b/hw/ipmi/ipmi.c index f09f217e78..5cf1caa88a 100644 --- a/hw/ipmi/ipmi.c +++ b/hw/ipmi/ipmi.c @@ -51,7 +51,7 @@ static int ipmi_do_hw_op(IPMIInterface *s, enum ipmi_op op, int checkonly) if (checkonly) { return 0; } - qemu_system_powerdown_request(); + qemu_system_shutdown_request(); return 0; case IPMI_SEND_NMI: @@ -61,9 +61,15 @@ static int ipmi_do_hw_op(IPMIInterface *s, enum ipmi_op op, int checkonly) qmp_inject_nmi(NULL); return 0; + case IPMI_SHUTDOWN_VIA_ACPI_OVERTEMP: + if (checkonly) { + return 0; + } + qemu_system_powerdown_request(); + return 0; + case IPMI_POWERCYCLE_CHASSIS: case IPMI_PULSE_DIAG_IRQ: - case IPMI_SHUTDOWN_VIA_ACPI_OVERTEMP: case IPMI_POWERON_CHASSIS: default: return IPMI_CC_COMMAND_NOT_SUPPORTED; diff --git a/hw/ipmi/ipmi_bmc_extern.c b/hw/ipmi/ipmi_bmc_extern.c index 4b310e5eff..e8e3d250b6 100644 --- a/hw/ipmi/ipmi_bmc_extern.c +++ b/hw/ipmi/ipmi_bmc_extern.c @@ -54,7 +54,8 @@ #define VM_CAPABILITIES_IRQ 0x04 #define VM_CAPABILITIES_NMI 0x08 #define VM_CAPABILITIES_ATTN 0x10 -#define VM_CMD_FORCEOFF 0x09 +#define VM_CAPABILITIES_GRACEFUL_SHUTDOWN 0x20 +#define VM_CMD_GRACEFUL_SHUTDOWN 0x09 #define TYPE_IPMI_BMC_EXTERN "ipmi-bmc-extern" #define IPMI_BMC_EXTERN(obj) OBJECT_CHECK(IPMIBmcExtern, (obj), \ @@ -276,8 +277,8 @@ static void handle_hw_op(IPMIBmcExtern *ibe, unsigned char hw_op) k->do_hw_op(s, IPMI_SEND_NMI, 0); break; - case VM_CMD_FORCEOFF: - qemu_system_shutdown_request(); + case VM_CMD_GRACEFUL_SHUTDOWN: + k->do_hw_op(s, IPMI_SHUTDOWN_VIA_ACPI_OVERTEMP, 0); break; } } @@ -401,6 +402,10 @@ static void chr_event(void *opaque, int event) if (k->do_hw_op(ibe->parent.intf, IPMI_POWEROFF_CHASSIS, 1) == 0) { v |= VM_CAPABILITIES_POWER; } + if (k->do_hw_op(ibe->parent.intf, IPMI_SHUTDOWN_VIA_ACPI_OVERTEMP, 1) + == 0) { + v |= VM_CAPABILITIES_GRACEFUL_SHUTDOWN; + } if (k->do_hw_op(ibe->parent.intf, IPMI_RESET_CHASSIS, 1) == 0) { v |= VM_CAPABILITIES_RESET; } @@ -512,6 +517,7 @@ static void ipmi_bmc_extern_class_init(ObjectClass *oc, void *data) bk->handle_command = ipmi_bmc_extern_handle_command; bk->handle_reset = ipmi_bmc_extern_handle_reset; + dc->hotpluggable = false; dc->realize = ipmi_bmc_extern_realize; dc->props = ipmi_bmc_extern_properties; } diff --git a/hw/ipmi/ipmi_bmc_sim.c b/hw/ipmi/ipmi_bmc_sim.c index 17c7c0ea07..c7883d6f5e 100644 --- a/hw/ipmi/ipmi_bmc_sim.c +++ b/hw/ipmi/ipmi_bmc_sim.c @@ -217,7 +217,6 @@ struct IPMIBmcSim { /* Odd netfns are for responses, so we only need the even ones. */ const IPMINetfn *netfns[MAX_NETFNS / 2]; - QemuMutex lock; /* We allow one event in the buffer */ uint8_t evtbuf[16]; @@ -940,7 +939,6 @@ static void get_msg(IPMIBmcSim *ibs, { IPMIRcvBufEntry *msg; - qemu_mutex_lock(&ibs->lock); if (QTAILQ_EMPTY(&ibs->rcvbufs)) { rsp_buffer_set_error(rsp, 0x80); /* Queue empty */ goto out; @@ -960,7 +958,6 @@ static void get_msg(IPMIBmcSim *ibs, } out: - qemu_mutex_unlock(&ibs->lock); return; } @@ -1055,11 +1052,9 @@ static void send_msg(IPMIBmcSim *ibs, end_msg: msg->buf[msg->len] = ipmb_checksum(msg->buf, msg->len, 0); msg->len++; - qemu_mutex_lock(&ibs->lock); QTAILQ_INSERT_TAIL(&ibs->rcvbufs, msg, entry); ibs->msg_flags |= IPMI_BMC_MSG_FLAG_RCV_MSG_QUEUE; k->set_atn(s, 1, attn_irq_enabled(ibs)); - qemu_mutex_unlock(&ibs->lock); } static void do_watchdog_reset(IPMIBmcSim *ibs) @@ -1753,7 +1748,6 @@ static void ipmi_sim_realize(DeviceState *dev, Error **errp) unsigned int i; IPMIBmcSim *ibs = IPMI_BMC_SIMULATOR(b); - qemu_mutex_init(&ibs->lock); QTAILQ_INIT(&ibs->rcvbufs); ibs->bmc_global_enables = (1 << IPMI_BMC_EVENT_LOG_BIT); @@ -1791,6 +1785,7 @@ static void ipmi_sim_class_init(ObjectClass *oc, void *data) DeviceClass *dc = DEVICE_CLASS(oc); IPMIBmcClass *bk = IPMI_BMC_CLASS(oc); + dc->hotpluggable = false; dc->realize = ipmi_sim_realize; bk->handle_command = ipmi_sim_handle_command; } diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c index 7895805a23..db896b0bb6 100644 --- a/hw/mem/nvdimm.c +++ b/hw/mem/nvdimm.c @@ -148,13 +148,9 @@ static MemoryRegion *nvdimm_get_vmstate_memory_region(PCDIMMDevice *dimm) static void nvdimm_class_init(ObjectClass *oc, void *data) { - DeviceClass *dc = DEVICE_CLASS(oc); PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc); NVDIMMClass *nvc = NVDIMM_CLASS(oc); - /* nvdimm hotplug has not been supported yet. */ - dc->hotpluggable = false; - ddc->realize = nvdimm_realize; ddc->get_memory_region = nvdimm_get_memory_region; ddc->get_vmstate_memory_region = nvdimm_get_vmstate_memory_region; diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index ee136ab940..7d7f8f6e19 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -59,38 +59,11 @@ static void virtio_ccw_stop_ioeventfd(VirtioCcwDevice *dev) virtio_bus_stop_ioeventfd(&dev->bus); } -static bool virtio_ccw_ioeventfd_started(DeviceState *d) +static bool virtio_ccw_ioeventfd_enabled(DeviceState *d) { VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); - return dev->ioeventfd_started; -} - -static void virtio_ccw_ioeventfd_set_started(DeviceState *d, bool started, - bool err) -{ - VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); - - dev->ioeventfd_started = started; - if (err) { - /* Disable ioeventfd for this device. */ - dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD; - } -} - -static bool virtio_ccw_ioeventfd_disabled(DeviceState *d) -{ - VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); - - return dev->ioeventfd_disabled || - !(dev->flags & VIRTIO_CCW_FLAG_USE_IOEVENTFD); -} - -static void virtio_ccw_ioeventfd_set_disabled(DeviceState *d, bool disabled) -{ - VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); - - dev->ioeventfd_disabled = disabled; + return (dev->flags & VIRTIO_CCW_FLAG_USE_IOEVENTFD) != 0; } static int virtio_ccw_ioeventfd_assign(DeviceState *d, EventNotifier *notifier, @@ -709,6 +682,10 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) sch->cssid, sch->ssid, sch->schid, sch->devno, ccw_dev->bus_id.valid ? "user-configured" : "auto-configured"); + if (!kvm_eventfds_enabled()) { + dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD; + } + if (k->realize) { k->realize(dev, &err); } @@ -1311,10 +1288,6 @@ static void virtio_ccw_device_plugged(DeviceState *d, Error **errp) return; } - if (!kvm_eventfds_enabled()) { - dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD; - } - sch->id.cu_model = virtio_bus_get_vdev_id(&dev->bus); @@ -1616,10 +1589,7 @@ static void virtio_ccw_bus_class_init(ObjectClass *klass, void *data) k->pre_plugged = virtio_ccw_pre_plugged; k->device_plugged = virtio_ccw_device_plugged; k->device_unplugged = virtio_ccw_device_unplugged; - k->ioeventfd_started = virtio_ccw_ioeventfd_started; - k->ioeventfd_set_started = virtio_ccw_ioeventfd_set_started; - k->ioeventfd_disabled = virtio_ccw_ioeventfd_disabled; - k->ioeventfd_set_disabled = virtio_ccw_ioeventfd_set_disabled; + k->ioeventfd_enabled = virtio_ccw_ioeventfd_enabled; k->ioeventfd_assign = virtio_ccw_ioeventfd_assign; } diff --git a/hw/s390x/virtio-ccw.h b/hw/s390x/virtio-ccw.h index 565094e4fb..77d10f1671 100644 --- a/hw/s390x/virtio-ccw.h +++ b/hw/s390x/virtio-ccw.h @@ -86,8 +86,6 @@ struct VirtioCcwDevice { int revision; uint32_t max_rev; VirtioBusState bus; - bool ioeventfd_started; - bool ioeventfd_disabled; uint32_t flags; uint8_t thinint_isc; AdapterRoutes routes; diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index 9424f0e057..f2ea29dbc3 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -12,6 +12,7 @@ */ #include "qemu/osdep.h" +#include "qapi/error.h" #include "hw/virtio/virtio-scsi.h" #include "qemu/error-report.h" #include "sysemu/block-backend.h" @@ -21,20 +22,30 @@ #include "hw/virtio/virtio-access.h" /* Context: QEMU global mutex held */ -void virtio_scsi_set_iothread(VirtIOSCSI *s, IOThread *iothread) +void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp) { - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + VirtIODevice *vdev = VIRTIO_DEVICE(s); + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - assert(!s->ctx); - s->ctx = iothread_get_aio_context(vs->conf.iothread); - - /* Don't try if transport does not support notifiers. */ - if (!k->set_guest_notifiers || !k->ioeventfd_started) { - fprintf(stderr, "virtio-scsi: Failed to set iothread " - "(transport does not support notifiers)"); - exit(1); + if (vs->conf.iothread) { + if (!k->set_guest_notifiers || !k->ioeventfd_assign) { + error_setg(errp, + "device is incompatible with iothread " + "(transport does not support notifiers)"); + return; + } + if (!virtio_device_ioeventfd_enabled(vdev)) { + error_setg(errp, "ioeventfd is required for iothread"); + return; + } + s->ctx = iothread_get_aio_context(vs->conf.iothread); + } else { + if (!virtio_device_ioeventfd_enabled(vdev)) { + return; + } + s->ctx = qemu_get_aio_context(); } } @@ -105,19 +116,19 @@ static void virtio_scsi_clear_aio(VirtIOSCSI *s) } /* Context: QEMU global mutex held */ -void virtio_scsi_dataplane_start(VirtIOSCSI *s) +int virtio_scsi_dataplane_start(VirtIODevice *vdev) { int i; int rc; - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); + VirtIOSCSI *s = VIRTIO_SCSI(vdev); if (s->dataplane_started || s->dataplane_starting || - s->dataplane_fenced || - s->ctx != iothread_get_aio_context(vs->conf.iothread)) { - return; + s->dataplane_fenced) { + return 0; } s->dataplane_starting = true; @@ -152,7 +163,7 @@ void virtio_scsi_dataplane_start(VirtIOSCSI *s) s->dataplane_starting = false; s->dataplane_started = true; aio_context_release(s->ctx); - return; + return 0; fail_vrings: virtio_scsi_clear_aio(s); @@ -165,14 +176,16 @@ fail_guest_notifiers: s->dataplane_fenced = true; s->dataplane_starting = false; s->dataplane_started = true; + return -ENOSYS; } /* Context: QEMU global mutex held */ -void virtio_scsi_dataplane_stop(VirtIOSCSI *s) +void virtio_scsi_dataplane_stop(VirtIODevice *vdev) { - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); + VirtIOSCSI *s = VIRTIO_SCSI(vdev); int i; if (!s->dataplane_started || s->dataplane_stopping) { @@ -186,7 +199,6 @@ void virtio_scsi_dataplane_stop(VirtIOSCSI *s) return; } s->dataplane_stopping = true; - assert(s->ctx == iothread_get_aio_context(vs->conf.iothread)); aio_context_acquire(s->ctx); virtio_scsi_clear_aio(s); diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 4762f05274..3e5ae6ac0f 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -434,7 +434,7 @@ static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) VirtIOSCSI *s = (VirtIOSCSI *)vdev; if (s->ctx) { - virtio_scsi_dataplane_start(s); + virtio_device_start_ioeventfd(vdev); if (!s->dataplane_fenced) { return; } @@ -610,7 +610,7 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) VirtIOSCSI *s = (VirtIOSCSI *)vdev; if (s->ctx) { - virtio_scsi_dataplane_start(s); + virtio_device_start_ioeventfd(vdev); if (!s->dataplane_fenced) { return; } @@ -669,9 +669,7 @@ static void virtio_scsi_reset(VirtIODevice *vdev) VirtIOSCSI *s = VIRTIO_SCSI(vdev); VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); - if (s->ctx) { - virtio_scsi_dataplane_stop(s); - } + assert(!s->dataplane_started); s->resetting++; qbus_reset_all(&s->bus.qbus); s->resetting--; @@ -749,7 +747,7 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) VirtIOSCSI *s = VIRTIO_SCSI(vdev); if (s->ctx) { - virtio_scsi_dataplane_start(s); + virtio_device_start_ioeventfd(vdev); if (!s->dataplane_fenced) { return; } @@ -848,14 +846,10 @@ void virtio_scsi_common_realize(DeviceState *dev, Error **errp, s->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE; s->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE; - s->ctrl_vq = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, ctrl); - s->event_vq = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, evt); + s->ctrl_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, ctrl); + s->event_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, evt); for (i = 0; i < s->conf.num_queues; i++) { - s->cmd_vqs[i] = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, cmd); - } - - if (s->conf.iothread) { - virtio_scsi_set_iothread(VIRTIO_SCSI(s), s->conf.iothread); + s->cmd_vqs[i] = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, cmd); } } @@ -885,6 +879,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) return; } } + + virtio_scsi_dataplane_setup(s, errp); } static void virtio_scsi_instance_init(Object *obj) @@ -957,6 +953,8 @@ static void virtio_scsi_class_init(ObjectClass *klass, void *data) vdc->set_config = virtio_scsi_set_config; vdc->get_features = virtio_scsi_get_features; vdc->reset = virtio_scsi_reset; + vdc->start_ioeventfd = virtio_scsi_dataplane_start; + vdc->stop_ioeventfd = virtio_scsi_dataplane_stop; hc->plug = virtio_scsi_hotplug; hc->unplug = virtio_scsi_hotunplug; } diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index e71630812e..95c4c30ea1 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -7,3 +7,5 @@ obj-y += virtio.o virtio-balloon.o obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o +obj-y += virtio-crypto.o +obj-$(CONFIG_VIRTIO_PCI) += virtio-crypto-pci.o diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index bd051ab2e1..131f1643b2 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1190,12 +1190,13 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); int i, r, e; - if (!k->ioeventfd_started) { + if (!k->ioeventfd_assign) { error_report("binding does not support host notifiers"); r = -ENOSYS; goto fail; } + virtio_device_stop_ioeventfd(vdev); for (i = 0; i < hdev->nvqs; ++i) { r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, true); @@ -1215,6 +1216,7 @@ fail_vq: } assert (e >= 0); } + virtio_device_start_ioeventfd(vdev); fail: return r; } @@ -1237,6 +1239,7 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) } assert (r >= 0); } + virtio_device_start_ioeventfd(vdev); } /* Test and clear event pending status. diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 1d77028236..cfba053280 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -394,21 +394,9 @@ static void virtio_balloon_to_target(void *opaque, ram_addr_t target) trace_virtio_balloon_to_target(target, dev->num_pages); } -static void virtio_balloon_save_device(VirtIODevice *vdev, QEMUFile *f) +static int virtio_balloon_post_load_device(void *opaque, int version_id) { - VirtIOBalloon *s = VIRTIO_BALLOON(vdev); - - qemu_put_be32(f, s->num_pages); - qemu_put_be32(f, s->actual); -} - -static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f, - int version_id) -{ - VirtIOBalloon *s = VIRTIO_BALLOON(vdev); - - s->num_pages = qemu_get_be32(f); - s->actual = qemu_get_be32(f); + VirtIOBalloon *s = VIRTIO_BALLOON(opaque); if (balloon_stats_enabled(s)) { balloon_stats_change_timer(s, s->stats_poll_interval); @@ -416,6 +404,18 @@ static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f, return 0; } +static const VMStateDescription vmstate_virtio_balloon_device = { + .name = "virtio-balloon-device", + .version_id = 1, + .minimum_version_id = 1, + .post_load = virtio_balloon_post_load_device, + .fields = (VMStateField[]) { + VMSTATE_UINT32(num_pages, VirtIOBalloon), + VMSTATE_UINT32(actual, VirtIOBalloon), + VMSTATE_END_OF_LIST() + }, +}; + static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); @@ -517,9 +517,8 @@ static void virtio_balloon_class_init(ObjectClass *klass, void *data) vdc->get_config = virtio_balloon_get_config; vdc->set_config = virtio_balloon_set_config; vdc->get_features = virtio_balloon_get_features; - vdc->save = virtio_balloon_save_device; - vdc->load = virtio_balloon_load_device; vdc->set_status = virtio_balloon_set_status; + vdc->vmsd = &vmstate_virtio_balloon_device; } static const TypeInfo virtio_balloon_info = { diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index 11f65bd225..bf61f66a04 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -147,131 +147,97 @@ void virtio_bus_set_vdev_config(VirtioBusState *bus, uint8_t *config) } } -/* - * This function handles both assigning the ioeventfd handler and - * registering it with the kernel. - * assign: register/deregister ioeventfd with the kernel - * set_handler: use the generic ioeventfd handler - */ -static int set_host_notifier_internal(DeviceState *proxy, VirtioBusState *bus, - int n, bool assign, bool set_handler) +int virtio_bus_start_ioeventfd(VirtioBusState *bus) { - VirtIODevice *vdev = virtio_bus_get_device(bus); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus); - VirtQueue *vq = virtio_get_queue(vdev, n); - EventNotifier *notifier = virtio_queue_get_host_notifier(vq); - int r = 0; + DeviceState *proxy = DEVICE(BUS(bus)->parent); + VirtIODevice *vdev = virtio_bus_get_device(bus); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); + int r; - if (assign) { - r = event_notifier_init(notifier, 1); - if (r < 0) { - error_report("%s: unable to init event notifier: %s (%d)", - __func__, strerror(-r), r); - return r; - } - virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler); - r = k->ioeventfd_assign(proxy, notifier, n, assign); - if (r < 0) { - error_report("%s: unable to assign ioeventfd: %d", __func__, r); - virtio_queue_set_host_notifier_fd_handler(vq, false, false); - event_notifier_cleanup(notifier); - return r; - } - } else { - k->ioeventfd_assign(proxy, notifier, n, assign); - virtio_queue_set_host_notifier_fd_handler(vq, false, false); - event_notifier_cleanup(notifier); + if (!k->ioeventfd_assign || !k->ioeventfd_enabled(proxy)) { + return -ENOSYS; } - return r; + if (bus->ioeventfd_started) { + return 0; + } + r = vdc->start_ioeventfd(vdev); + if (r < 0) { + error_report("%s: failed. Fallback to userspace (slower).", __func__); + return r; + } + bus->ioeventfd_started = true; + return 0; } -void virtio_bus_start_ioeventfd(VirtioBusState *bus) +void virtio_bus_stop_ioeventfd(VirtioBusState *bus) { - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus); - DeviceState *proxy = DEVICE(BUS(bus)->parent); VirtIODevice *vdev; - int n, r; + VirtioDeviceClass *vdc; - if (!k->ioeventfd_started || k->ioeventfd_started(proxy)) { + if (!bus->ioeventfd_started) { return; } - if (k->ioeventfd_disabled(proxy)) { - return; - } - vdev = virtio_bus_get_device(bus); - for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { - if (!virtio_queue_get_num(vdev, n)) { - continue; - } - r = set_host_notifier_internal(proxy, bus, n, true, true); - if (r < 0) { - goto assign_error; - } - } - k->ioeventfd_set_started(proxy, true, false); - return; - -assign_error: - while (--n >= 0) { - if (!virtio_queue_get_num(vdev, n)) { - continue; - } - r = set_host_notifier_internal(proxy, bus, n, false, false); - assert(r >= 0); - } - k->ioeventfd_set_started(proxy, false, true); - error_report("%s: failed. Fallback to userspace (slower).", __func__); + vdev = virtio_bus_get_device(bus); + vdc = VIRTIO_DEVICE_GET_CLASS(vdev); + vdc->stop_ioeventfd(vdev); + bus->ioeventfd_started = false; } -void virtio_bus_stop_ioeventfd(VirtioBusState *bus) +bool virtio_bus_ioeventfd_enabled(VirtioBusState *bus) { VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus); DeviceState *proxy = DEVICE(BUS(bus)->parent); - VirtIODevice *vdev; - int n, r; - if (!k->ioeventfd_started || !k->ioeventfd_started(proxy)) { - return; - } - vdev = virtio_bus_get_device(bus); - for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { - if (!virtio_queue_get_num(vdev, n)) { - continue; - } - r = set_host_notifier_internal(proxy, bus, n, false, false); - assert(r >= 0); - } - k->ioeventfd_set_started(proxy, false, false); + return k->ioeventfd_assign && k->ioeventfd_enabled(proxy); } /* - * This function switches from/to the generic ioeventfd handler. - * assign==false means 'use generic ioeventfd handler'. + * This function switches ioeventfd on/off in the device. + * The caller must set or clear the handlers for the EventNotifier. */ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign) { + VirtIODevice *vdev = virtio_bus_get_device(bus); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus); DeviceState *proxy = DEVICE(BUS(bus)->parent); + VirtQueue *vq = virtio_get_queue(vdev, n); + EventNotifier *notifier = virtio_queue_get_host_notifier(vq); + int r = 0; - if (!k->ioeventfd_started) { + if (!k->ioeventfd_assign) { return -ENOSYS; } - k->ioeventfd_set_disabled(proxy, assign); + if (assign) { - /* - * Stop using the generic ioeventfd, we are doing eventfd handling - * ourselves below - * - * FIXME: We should just switch the handler and not deassign the - * ioeventfd. - * Otherwise, there's a window where we don't have an - * ioeventfd and we may end up with a notification where - * we don't expect one. - */ - virtio_bus_stop_ioeventfd(bus); + assert(!bus->ioeventfd_started); + r = event_notifier_init(notifier, 1); + if (r < 0) { + error_report("%s: unable to init event notifier: %s (%d)", + __func__, strerror(-r), r); + return r; + } + r = k->ioeventfd_assign(proxy, notifier, n, true); + if (r < 0) { + error_report("%s: unable to assign ioeventfd: %d", __func__, r); + goto cleanup_event_notifier; + } + return 0; + } else { + if (!bus->ioeventfd_started) { + return 0; + } + k->ioeventfd_assign(proxy, notifier, n, false); } - return set_host_notifier_internal(proxy, bus, n, assign, false); + +cleanup_event_notifier: + /* Test and clear notifier after disabling event, + * in case poll callback didn't have time to run. + */ + virtio_queue_host_notifier_read(notifier); + event_notifier_cleanup(notifier); + return r; } static char *virtio_bus_get_dev_path(DeviceState *dev) diff --git a/hw/virtio/virtio-crypto-pci.c b/hw/virtio/virtio-crypto-pci.c new file mode 100644 index 0000000000..21d998401a --- /dev/null +++ b/hw/virtio/virtio-crypto-pci.c @@ -0,0 +1,77 @@ +/* + * Virtio crypto device + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Gonglei <arei.gonglei@huawei.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + * + */ +#include "qemu/osdep.h" +#include "hw/pci/pci.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-pci.h" +#include "hw/virtio/virtio-crypto.h" +#include "qapi/error.h" + +static Property virtio_crypto_pci_properties[] = { + DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, + VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_crypto_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VirtIOCryptoPCI *vcrypto = VIRTIO_CRYPTO_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&vcrypto->vdev); + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); + virtio_pci_force_virtio_1(vpci_dev); + object_property_set_bool(OBJECT(vdev), true, "realized", errp); + object_property_set_link(OBJECT(vcrypto), + OBJECT(vcrypto->vdev.conf.cryptodev), "cryptodev", + NULL); +} + +static void virtio_crypto_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + + k->realize = virtio_crypto_pci_realize; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + dc->props = virtio_crypto_pci_properties; + + pcidev_k->class_id = PCI_CLASS_OTHERS; +} + +static void virtio_crypto_initfn(Object *obj) +{ + VirtIOCryptoPCI *dev = VIRTIO_CRYPTO_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_CRYPTO); + object_property_add_alias(obj, "cryptodev", OBJECT(&dev->vdev), + "cryptodev", &error_abort); +} + +static const TypeInfo virtio_crypto_pci_info = { + .name = TYPE_VIRTIO_CRYPTO_PCI, + .parent = TYPE_VIRTIO_PCI, + .instance_size = sizeof(VirtIOCryptoPCI), + .instance_init = virtio_crypto_initfn, + .class_init = virtio_crypto_pci_class_init, +}; + +static void virtio_crypto_pci_register_types(void) +{ + type_register_static(&virtio_crypto_pci_info); +} +type_init(virtio_crypto_pci_register_types) diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c new file mode 100644 index 0000000000..170114f52b --- /dev/null +++ b/hw/virtio/virtio-crypto.c @@ -0,0 +1,898 @@ +/* + * Virtio crypto Support + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Gonglei <arei.gonglei@huawei.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu/iov.h" +#include "hw/qdev.h" +#include "qapi/error.h" +#include "qemu/error-report.h" + +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-crypto.h" +#include "hw/virtio/virtio-access.h" +#include "standard-headers/linux/virtio_ids.h" + +#define VIRTIO_CRYPTO_VM_VERSION 1 + +/* + * Transfer virtqueue index to crypto queue index. + * The control virtqueue is after the data virtqueues + * so the input value doesn't need to be adjusted + */ +static inline int virtio_crypto_vq2q(int queue_index) +{ + return queue_index; +} + +static int +virtio_crypto_cipher_session_helper(VirtIODevice *vdev, + CryptoDevBackendSymSessionInfo *info, + struct virtio_crypto_cipher_session_para *cipher_para, + struct iovec **iov, unsigned int *out_num) +{ + VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev); + unsigned int num = *out_num; + + info->cipher_alg = ldl_le_p(&cipher_para->algo); + info->key_len = ldl_le_p(&cipher_para->keylen); + info->direction = ldl_le_p(&cipher_para->op); + DPRINTF("cipher_alg=%" PRIu32 ", info->direction=%" PRIu32 "\n", + info->cipher_alg, info->direction); + + if (info->key_len > vcrypto->conf.max_cipher_key_len) { + error_report("virtio-crypto length of cipher key is too big: %u", + info->key_len); + return -VIRTIO_CRYPTO_ERR; + } + /* Get cipher key */ + if (info->key_len > 0) { + size_t s; + DPRINTF("keylen=%" PRIu32 "\n", info->key_len); + + info->cipher_key = g_malloc(info->key_len); + s = iov_to_buf(*iov, num, 0, info->cipher_key, info->key_len); + if (unlikely(s != info->key_len)) { + virtio_error(vdev, "virtio-crypto cipher key incorrect"); + return -EFAULT; + } + iov_discard_front(iov, &num, info->key_len); + *out_num = num; + } + + return 0; +} + +static int64_t +virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto, + struct virtio_crypto_sym_create_session_req *sess_req, + uint32_t queue_id, + uint32_t opcode, + struct iovec *iov, unsigned int out_num) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto); + CryptoDevBackendSymSessionInfo info; + int64_t session_id; + int queue_index; + uint32_t op_type; + Error *local_err = NULL; + int ret; + + memset(&info, 0, sizeof(info)); + op_type = ldl_le_p(&sess_req->op_type); + info.op_type = op_type; + info.op_code = opcode; + + if (op_type == VIRTIO_CRYPTO_SYM_OP_CIPHER) { + ret = virtio_crypto_cipher_session_helper(vdev, &info, + &sess_req->u.cipher.para, + &iov, &out_num); + if (ret < 0) { + goto err; + } + } else if (op_type == VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) { + size_t s; + /* cipher part */ + ret = virtio_crypto_cipher_session_helper(vdev, &info, + &sess_req->u.chain.para.cipher_param, + &iov, &out_num); + if (ret < 0) { + goto err; + } + /* hash part */ + info.alg_chain_order = ldl_le_p( + &sess_req->u.chain.para.alg_chain_order); + info.add_len = ldl_le_p(&sess_req->u.chain.para.aad_len); + info.hash_mode = ldl_le_p(&sess_req->u.chain.para.hash_mode); + if (info.hash_mode == VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH) { + info.hash_alg = ldl_le_p(&sess_req->u.chain.para.u.mac_param.algo); + info.auth_key_len = ldl_le_p( + &sess_req->u.chain.para.u.mac_param.auth_key_len); + info.hash_result_len = ldl_le_p( + &sess_req->u.chain.para.u.mac_param.hash_result_len); + if (info.auth_key_len > vcrypto->conf.max_auth_key_len) { + error_report("virtio-crypto length of auth key is too big: %u", + info.auth_key_len); + ret = -VIRTIO_CRYPTO_ERR; + goto err; + } + /* get auth key */ + if (info.auth_key_len > 0) { + DPRINTF("auth_keylen=%" PRIu32 "\n", info.auth_key_len); + info.auth_key = g_malloc(info.auth_key_len); + s = iov_to_buf(iov, out_num, 0, info.auth_key, + info.auth_key_len); + if (unlikely(s != info.auth_key_len)) { + virtio_error(vdev, + "virtio-crypto authenticated key incorrect"); + ret = -EFAULT; + goto err; + } + iov_discard_front(&iov, &out_num, info.auth_key_len); + } + } else if (info.hash_mode == VIRTIO_CRYPTO_SYM_HASH_MODE_PLAIN) { + info.hash_alg = ldl_le_p( + &sess_req->u.chain.para.u.hash_param.algo); + info.hash_result_len = ldl_le_p( + &sess_req->u.chain.para.u.hash_param.hash_result_len); + } else { + /* VIRTIO_CRYPTO_SYM_HASH_MODE_NESTED */ + error_report("unsupported hash mode"); + ret = -VIRTIO_CRYPTO_NOTSUPP; + goto err; + } + } else { + /* VIRTIO_CRYPTO_SYM_OP_NONE */ + error_report("unsupported cipher op_type: VIRTIO_CRYPTO_SYM_OP_NONE"); + ret = -VIRTIO_CRYPTO_NOTSUPP; + goto err; + } + + queue_index = virtio_crypto_vq2q(queue_id); + session_id = cryptodev_backend_sym_create_session( + vcrypto->cryptodev, + &info, queue_index, &local_err); + if (session_id >= 0) { + DPRINTF("create session_id=%" PRIu64 " successfully\n", + session_id); + + ret = session_id; + } else { + if (local_err) { + error_report_err(local_err); + } + ret = -VIRTIO_CRYPTO_ERR; + } + +err: + g_free(info.cipher_key); + g_free(info.auth_key); + return ret; +} + +static uint8_t +virtio_crypto_handle_close_session(VirtIOCrypto *vcrypto, + struct virtio_crypto_destroy_session_req *close_sess_req, + uint32_t queue_id) +{ + int ret; + uint64_t session_id; + uint32_t status; + Error *local_err = NULL; + + session_id = ldq_le_p(&close_sess_req->session_id); + DPRINTF("close session, id=%" PRIu64 "\n", session_id); + + ret = cryptodev_backend_sym_close_session( + vcrypto->cryptodev, session_id, queue_id, &local_err); + if (ret == 0) { + status = VIRTIO_CRYPTO_OK; + } else { + if (local_err) { + error_report_err(local_err); + } else { + error_report("destroy session failed"); + } + status = VIRTIO_CRYPTO_ERR; + } + + return status; +} + +static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev); + struct virtio_crypto_op_ctrl_req ctrl; + VirtQueueElement *elem; + struct iovec *in_iov; + struct iovec *out_iov; + unsigned in_num; + unsigned out_num; + uint32_t queue_id; + uint32_t opcode; + struct virtio_crypto_session_input input; + int64_t session_id; + uint8_t status; + size_t s; + + for (;;) { + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!elem) { + break; + } + if (elem->out_num < 1 || elem->in_num < 1) { + virtio_error(vdev, "virtio-crypto ctrl missing headers"); + virtqueue_detach_element(vq, elem, 0); + g_free(elem); + break; + } + + out_num = elem->out_num; + out_iov = elem->out_sg; + in_num = elem->in_num; + in_iov = elem->in_sg; + if (unlikely(iov_to_buf(out_iov, out_num, 0, &ctrl, sizeof(ctrl)) + != sizeof(ctrl))) { + virtio_error(vdev, "virtio-crypto request ctrl_hdr too short"); + virtqueue_detach_element(vq, elem, 0); + g_free(elem); + break; + } + iov_discard_front(&out_iov, &out_num, sizeof(ctrl)); + + opcode = ldl_le_p(&ctrl.header.opcode); + queue_id = ldl_le_p(&ctrl.header.queue_id); + + switch (opcode) { + case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION: + memset(&input, 0, sizeof(input)); + session_id = virtio_crypto_create_sym_session(vcrypto, + &ctrl.u.sym_create_session, + queue_id, opcode, + out_iov, out_num); + /* Serious errors, need to reset virtio crypto device */ + if (session_id == -EFAULT) { + virtqueue_detach_element(vq, elem, 0); + break; + } else if (session_id == -VIRTIO_CRYPTO_NOTSUPP) { + stl_le_p(&input.status, VIRTIO_CRYPTO_NOTSUPP); + } else if (session_id == -VIRTIO_CRYPTO_ERR) { + stl_le_p(&input.status, VIRTIO_CRYPTO_ERR); + } else { + /* Set the session id */ + stq_le_p(&input.session_id, session_id); + stl_le_p(&input.status, VIRTIO_CRYPTO_OK); + } + + s = iov_from_buf(in_iov, in_num, 0, &input, sizeof(input)); + if (unlikely(s != sizeof(input))) { + virtio_error(vdev, "virtio-crypto input incorrect"); + virtqueue_detach_element(vq, elem, 0); + break; + } + virtqueue_push(vq, elem, sizeof(input)); + virtio_notify(vdev, vq); + break; + case VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION: + case VIRTIO_CRYPTO_HASH_DESTROY_SESSION: + case VIRTIO_CRYPTO_MAC_DESTROY_SESSION: + case VIRTIO_CRYPTO_AEAD_DESTROY_SESSION: + status = virtio_crypto_handle_close_session(vcrypto, + &ctrl.u.destroy_session, queue_id); + /* The status only occupy one byte, we can directly use it */ + s = iov_from_buf(in_iov, in_num, 0, &status, sizeof(status)); + if (unlikely(s != sizeof(status))) { + virtio_error(vdev, "virtio-crypto status incorrect"); + virtqueue_detach_element(vq, elem, 0); + break; + } + virtqueue_push(vq, elem, sizeof(status)); + virtio_notify(vdev, vq); + break; + case VIRTIO_CRYPTO_HASH_CREATE_SESSION: + case VIRTIO_CRYPTO_MAC_CREATE_SESSION: + case VIRTIO_CRYPTO_AEAD_CREATE_SESSION: + default: + error_report("virtio-crypto unsupported ctrl opcode: %d", opcode); + memset(&input, 0, sizeof(input)); + stl_le_p(&input.status, VIRTIO_CRYPTO_NOTSUPP); + s = iov_from_buf(in_iov, in_num, 0, &input, sizeof(input)); + if (unlikely(s != sizeof(input))) { + virtio_error(vdev, "virtio-crypto input incorrect"); + virtqueue_detach_element(vq, elem, 0); + break; + } + virtqueue_push(vq, elem, sizeof(input)); + virtio_notify(vdev, vq); + + break; + } /* end switch case */ + + g_free(elem); + } /* end for loop */ +} + +static void virtio_crypto_init_request(VirtIOCrypto *vcrypto, VirtQueue *vq, + VirtIOCryptoReq *req) +{ + req->vcrypto = vcrypto; + req->vq = vq; + req->in = NULL; + req->in_iov = NULL; + req->in_num = 0; + req->in_len = 0; + req->flags = CRYPTODEV_BACKEND_ALG__MAX; + req->u.sym_op_info = NULL; +} + +static void virtio_crypto_free_request(VirtIOCryptoReq *req) +{ + if (req) { + if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) { + g_free(req->u.sym_op_info); + } + g_free(req); + } +} + +static void +virtio_crypto_sym_input_data_helper(VirtIODevice *vdev, + VirtIOCryptoReq *req, + uint32_t status, + CryptoDevBackendSymOpInfo *sym_op_info) +{ + size_t s, len; + + if (status != VIRTIO_CRYPTO_OK) { + return; + } + + len = sym_op_info->dst_len; + /* Save the cipher result */ + s = iov_from_buf(req->in_iov, req->in_num, 0, sym_op_info->dst, len); + if (s != len) { + virtio_error(vdev, "virtio-crypto dest data incorrect"); + return; + } + + iov_discard_front(&req->in_iov, &req->in_num, len); + + if (sym_op_info->op_type == + VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) { + /* Save the digest result */ + s = iov_from_buf(req->in_iov, req->in_num, 0, + sym_op_info->digest_result, + sym_op_info->digest_result_len); + if (s != sym_op_info->digest_result_len) { + virtio_error(vdev, "virtio-crypto digest result incorrect"); + } + } +} + +static void virtio_crypto_req_complete(VirtIOCryptoReq *req, uint8_t status) +{ + VirtIOCrypto *vcrypto = req->vcrypto; + VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto); + + if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) { + virtio_crypto_sym_input_data_helper(vdev, req, status, + req->u.sym_op_info); + } + stb_p(&req->in->status, status); + virtqueue_push(req->vq, &req->elem, req->in_len); + virtio_notify(vdev, req->vq); +} + +static VirtIOCryptoReq * +virtio_crypto_get_request(VirtIOCrypto *s, VirtQueue *vq) +{ + VirtIOCryptoReq *req = virtqueue_pop(vq, sizeof(VirtIOCryptoReq)); + + if (req) { + virtio_crypto_init_request(s, vq, req); + } + return req; +} + +static CryptoDevBackendSymOpInfo * +virtio_crypto_sym_op_helper(VirtIODevice *vdev, + struct virtio_crypto_cipher_para *cipher_para, + struct virtio_crypto_alg_chain_data_para *alg_chain_para, + struct iovec *iov, unsigned int out_num) +{ + VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev); + CryptoDevBackendSymOpInfo *op_info; + uint32_t src_len = 0, dst_len = 0; + uint32_t iv_len = 0; + uint32_t aad_len = 0, hash_result_len = 0; + uint32_t hash_start_src_offset = 0, len_to_hash = 0; + uint32_t cipher_start_src_offset = 0, len_to_cipher = 0; + + size_t max_len, curr_size = 0; + size_t s; + + /* Plain cipher */ + if (cipher_para) { + iv_len = ldl_le_p(&cipher_para->iv_len); + src_len = ldl_le_p(&cipher_para->src_data_len); + dst_len = ldl_le_p(&cipher_para->dst_data_len); + } else if (alg_chain_para) { /* Algorithm chain */ + iv_len = ldl_le_p(&alg_chain_para->iv_len); + src_len = ldl_le_p(&alg_chain_para->src_data_len); + dst_len = ldl_le_p(&alg_chain_para->dst_data_len); + + aad_len = ldl_le_p(&alg_chain_para->aad_len); + hash_result_len = ldl_le_p(&alg_chain_para->hash_result_len); + hash_start_src_offset = ldl_le_p( + &alg_chain_para->hash_start_src_offset); + cipher_start_src_offset = ldl_le_p( + &alg_chain_para->cipher_start_src_offset); + len_to_cipher = ldl_le_p(&alg_chain_para->len_to_cipher); + len_to_hash = ldl_le_p(&alg_chain_para->len_to_hash); + } else { + return NULL; + } + + max_len = iv_len + aad_len + src_len + dst_len + hash_result_len; + if (unlikely(max_len > vcrypto->conf.max_size)) { + virtio_error(vdev, "virtio-crypto too big length"); + return NULL; + } + + op_info = g_malloc0(sizeof(CryptoDevBackendSymOpInfo) + max_len); + op_info->iv_len = iv_len; + op_info->src_len = src_len; + op_info->dst_len = dst_len; + op_info->aad_len = aad_len; + op_info->digest_result_len = hash_result_len; + op_info->hash_start_src_offset = hash_start_src_offset; + op_info->len_to_hash = len_to_hash; + op_info->cipher_start_src_offset = cipher_start_src_offset; + op_info->len_to_cipher = len_to_cipher; + /* Handle the initilization vector */ + if (op_info->iv_len > 0) { + DPRINTF("iv_len=%" PRIu32 "\n", op_info->iv_len); + op_info->iv = op_info->data + curr_size; + + s = iov_to_buf(iov, out_num, 0, op_info->iv, op_info->iv_len); + if (unlikely(s != op_info->iv_len)) { + virtio_error(vdev, "virtio-crypto iv incorrect"); + goto err; + } + iov_discard_front(&iov, &out_num, op_info->iv_len); + curr_size += op_info->iv_len; + } + + /* Handle additional authentication data if exists */ + if (op_info->aad_len > 0) { + DPRINTF("aad_len=%" PRIu32 "\n", op_info->aad_len); + op_info->aad_data = op_info->data + curr_size; + + s = iov_to_buf(iov, out_num, 0, op_info->aad_data, op_info->aad_len); + if (unlikely(s != op_info->aad_len)) { + virtio_error(vdev, "virtio-crypto additional auth data incorrect"); + goto err; + } + iov_discard_front(&iov, &out_num, op_info->aad_len); + + curr_size += op_info->aad_len; + } + + /* Handle the source data */ + if (op_info->src_len > 0) { + DPRINTF("src_len=%" PRIu32 "\n", op_info->src_len); + op_info->src = op_info->data + curr_size; + + s = iov_to_buf(iov, out_num, 0, op_info->src, op_info->src_len); + if (unlikely(s != op_info->src_len)) { + virtio_error(vdev, "virtio-crypto source data incorrect"); + goto err; + } + iov_discard_front(&iov, &out_num, op_info->src_len); + + curr_size += op_info->src_len; + } + + /* Handle the destination data */ + op_info->dst = op_info->data + curr_size; + curr_size += op_info->dst_len; + + DPRINTF("dst_len=%" PRIu32 "\n", op_info->dst_len); + + /* Handle the hash digest result */ + if (hash_result_len > 0) { + DPRINTF("hash_result_len=%" PRIu32 "\n", hash_result_len); + op_info->digest_result = op_info->data + curr_size; + } + + return op_info; + +err: + g_free(op_info); + return NULL; +} + +static int +virtio_crypto_handle_sym_req(VirtIOCrypto *vcrypto, + struct virtio_crypto_sym_data_req *req, + CryptoDevBackendSymOpInfo **sym_op_info, + struct iovec *iov, unsigned int out_num) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto); + uint32_t op_type; + CryptoDevBackendSymOpInfo *op_info; + + op_type = ldl_le_p(&req->op_type); + + if (op_type == VIRTIO_CRYPTO_SYM_OP_CIPHER) { + op_info = virtio_crypto_sym_op_helper(vdev, &req->u.cipher.para, + NULL, iov, out_num); + if (!op_info) { + return -EFAULT; + } + op_info->op_type = op_type; + } else if (op_type == VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) { + op_info = virtio_crypto_sym_op_helper(vdev, NULL, + &req->u.chain.para, + iov, out_num); + if (!op_info) { + return -EFAULT; + } + op_info->op_type = op_type; + } else { + /* VIRTIO_CRYPTO_SYM_OP_NONE */ + error_report("virtio-crypto unsupported cipher type"); + return -VIRTIO_CRYPTO_NOTSUPP; + } + + *sym_op_info = op_info; + + return 0; +} + +static int +virtio_crypto_handle_request(VirtIOCryptoReq *request) +{ + VirtIOCrypto *vcrypto = request->vcrypto; + VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto); + VirtQueueElement *elem = &request->elem; + int queue_index = virtio_crypto_vq2q(virtio_get_queue_index(request->vq)); + struct virtio_crypto_op_data_req req; + int ret; + struct iovec *in_iov; + struct iovec *out_iov; + unsigned in_num; + unsigned out_num; + uint32_t opcode; + uint8_t status = VIRTIO_CRYPTO_ERR; + uint64_t session_id; + CryptoDevBackendSymOpInfo *sym_op_info = NULL; + Error *local_err = NULL; + + if (elem->out_num < 1 || elem->in_num < 1) { + virtio_error(vdev, "virtio-crypto dataq missing headers"); + return -1; + } + + out_num = elem->out_num; + out_iov = elem->out_sg; + in_num = elem->in_num; + in_iov = elem->in_sg; + if (unlikely(iov_to_buf(out_iov, out_num, 0, &req, sizeof(req)) + != sizeof(req))) { + virtio_error(vdev, "virtio-crypto request outhdr too short"); + return -1; + } + iov_discard_front(&out_iov, &out_num, sizeof(req)); + + if (in_iov[in_num - 1].iov_len < + sizeof(struct virtio_crypto_inhdr)) { + virtio_error(vdev, "virtio-crypto request inhdr too short"); + return -1; + } + /* We always touch the last byte, so just see how big in_iov is. */ + request->in_len = iov_size(in_iov, in_num); + request->in = (void *)in_iov[in_num - 1].iov_base + + in_iov[in_num - 1].iov_len + - sizeof(struct virtio_crypto_inhdr); + iov_discard_back(in_iov, &in_num, sizeof(struct virtio_crypto_inhdr)); + + /* + * The length of operation result, including dest_data + * and digest_result if exists. + */ + request->in_num = in_num; + request->in_iov = in_iov; + + opcode = ldl_le_p(&req.header.opcode); + session_id = ldq_le_p(&req.header.session_id); + + switch (opcode) { + case VIRTIO_CRYPTO_CIPHER_ENCRYPT: + case VIRTIO_CRYPTO_CIPHER_DECRYPT: + ret = virtio_crypto_handle_sym_req(vcrypto, + &req.u.sym_req, + &sym_op_info, + out_iov, out_num); + /* Serious errors, need to reset virtio crypto device */ + if (ret == -EFAULT) { + return -1; + } else if (ret == -VIRTIO_CRYPTO_NOTSUPP) { + virtio_crypto_req_complete(request, VIRTIO_CRYPTO_NOTSUPP); + virtio_crypto_free_request(request); + } else { + sym_op_info->session_id = session_id; + + /* Set request's parameter */ + request->flags = CRYPTODEV_BACKEND_ALG_SYM; + request->u.sym_op_info = sym_op_info; + ret = cryptodev_backend_crypto_operation(vcrypto->cryptodev, + request, queue_index, &local_err); + if (ret < 0) { + status = -ret; + if (local_err) { + error_report_err(local_err); + } + } else { /* ret == VIRTIO_CRYPTO_OK */ + status = ret; + } + virtio_crypto_req_complete(request, status); + virtio_crypto_free_request(request); + } + break; + case VIRTIO_CRYPTO_HASH: + case VIRTIO_CRYPTO_MAC: + case VIRTIO_CRYPTO_AEAD_ENCRYPT: + case VIRTIO_CRYPTO_AEAD_DECRYPT: + default: + error_report("virtio-crypto unsupported dataq opcode: %u", + opcode); + virtio_crypto_req_complete(request, VIRTIO_CRYPTO_NOTSUPP); + virtio_crypto_free_request(request); + } + + return 0; +} + +static void virtio_crypto_handle_dataq(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev); + VirtIOCryptoReq *req; + + while ((req = virtio_crypto_get_request(vcrypto, vq))) { + if (virtio_crypto_handle_request(req) < 0) { + virtqueue_detach_element(req->vq, &req->elem, 0); + virtio_crypto_free_request(req); + break; + } + } +} + +static void virtio_crypto_dataq_bh(void *opaque) +{ + VirtIOCryptoQueue *q = opaque; + VirtIOCrypto *vcrypto = q->vcrypto; + VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto); + + /* This happens when device was stopped but BH wasn't. */ + if (!vdev->vm_running) { + return; + } + + /* Just in case the driver is not ready on more */ + if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { + return; + } + + virtio_crypto_handle_dataq(vdev, q->dataq); + virtio_queue_set_notification(q->dataq, 1); +} + +static void +virtio_crypto_handle_dataq_bh(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev); + VirtIOCryptoQueue *q = + &vcrypto->vqs[virtio_crypto_vq2q(virtio_get_queue_index(vq))]; + + /* This happens when device was stopped but VCPU wasn't. */ + if (!vdev->vm_running) { + return; + } + virtio_queue_set_notification(vq, 0); + qemu_bh_schedule(q->dataq_bh); +} + +static uint64_t virtio_crypto_get_features(VirtIODevice *vdev, + uint64_t features, + Error **errp) +{ + return features; +} + +static void virtio_crypto_reset(VirtIODevice *vdev) +{ + VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev); + /* multiqueue is disabled by default */ + vcrypto->curr_queues = 1; + if (!vcrypto->cryptodev->ready) { + vcrypto->status &= ~VIRTIO_CRYPTO_S_HW_READY; + } else { + vcrypto->status |= VIRTIO_CRYPTO_S_HW_READY; + } +} + +static void virtio_crypto_init_config(VirtIODevice *vdev) +{ + VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev); + + vcrypto->conf.crypto_services = + vcrypto->conf.cryptodev->conf.crypto_services; + vcrypto->conf.cipher_algo_l = + vcrypto->conf.cryptodev->conf.cipher_algo_l; + vcrypto->conf.cipher_algo_h = + vcrypto->conf.cryptodev->conf.cipher_algo_h; + vcrypto->conf.hash_algo = vcrypto->conf.cryptodev->conf.hash_algo; + vcrypto->conf.mac_algo_l = vcrypto->conf.cryptodev->conf.mac_algo_l; + vcrypto->conf.mac_algo_h = vcrypto->conf.cryptodev->conf.mac_algo_h; + vcrypto->conf.aead_algo = vcrypto->conf.cryptodev->conf.aead_algo; + vcrypto->conf.max_cipher_key_len = + vcrypto->conf.cryptodev->conf.max_cipher_key_len; + vcrypto->conf.max_auth_key_len = + vcrypto->conf.cryptodev->conf.max_auth_key_len; + vcrypto->conf.max_size = vcrypto->conf.cryptodev->conf.max_size; +} + +static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(dev); + int i; + + vcrypto->cryptodev = vcrypto->conf.cryptodev; + if (vcrypto->cryptodev == NULL) { + error_setg(errp, "'cryptodev' parameter expects a valid object"); + return; + } + + vcrypto->max_queues = MAX(vcrypto->cryptodev->conf.peers.queues, 1); + if (vcrypto->max_queues + 1 > VIRTIO_QUEUE_MAX) { + error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " + "must be a postive integer less than %d.", + vcrypto->max_queues, VIRTIO_QUEUE_MAX); + return; + } + + virtio_init(vdev, "virtio-crypto", VIRTIO_ID_CRYPTO, vcrypto->config_size); + vcrypto->curr_queues = 1; + vcrypto->vqs = g_malloc0(sizeof(VirtIOCryptoQueue) * vcrypto->max_queues); + for (i = 0; i < vcrypto->max_queues; i++) { + vcrypto->vqs[i].dataq = + virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh); + vcrypto->vqs[i].dataq_bh = + qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]); + vcrypto->vqs[i].vcrypto = vcrypto; + } + + vcrypto->ctrl_vq = virtio_add_queue(vdev, 64, virtio_crypto_handle_ctrl); + if (!vcrypto->cryptodev->ready) { + vcrypto->status &= ~VIRTIO_CRYPTO_S_HW_READY; + } else { + vcrypto->status |= VIRTIO_CRYPTO_S_HW_READY; + } + + virtio_crypto_init_config(vdev); +} + +static void virtio_crypto_device_unrealize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(dev); + VirtIOCryptoQueue *q; + int i, max_queues; + + max_queues = vcrypto->multiqueue ? vcrypto->max_queues : 1; + for (i = 0; i < max_queues; i++) { + virtio_del_queue(vdev, i); + q = &vcrypto->vqs[i]; + qemu_bh_delete(q->dataq_bh); + } + + g_free(vcrypto->vqs); + + virtio_cleanup(vdev); +} + +static const VMStateDescription vmstate_virtio_crypto = { + .name = "virtio-crypto", + .minimum_version_id = VIRTIO_CRYPTO_VM_VERSION, + .version_id = VIRTIO_CRYPTO_VM_VERSION, + .fields = (VMStateField[]) { + VMSTATE_VIRTIO_DEVICE, + VMSTATE_END_OF_LIST() + }, +}; + +static Property virtio_crypto_properties[] = { + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_crypto_get_config(VirtIODevice *vdev, uint8_t *config) +{ + VirtIOCrypto *c = VIRTIO_CRYPTO(vdev); + struct virtio_crypto_config crypto_cfg; + + /* + * Virtio-crypto device conforms to VIRTIO 1.0 which is always LE, + * so we can use LE accessors directly. + */ + stl_le_p(&crypto_cfg.status, c->status); + stl_le_p(&crypto_cfg.max_dataqueues, c->max_queues); + stl_le_p(&crypto_cfg.crypto_services, c->conf.crypto_services); + stl_le_p(&crypto_cfg.cipher_algo_l, c->conf.cipher_algo_l); + stl_le_p(&crypto_cfg.cipher_algo_h, c->conf.cipher_algo_h); + stl_le_p(&crypto_cfg.hash_algo, c->conf.hash_algo); + stl_le_p(&crypto_cfg.mac_algo_l, c->conf.mac_algo_l); + stl_le_p(&crypto_cfg.mac_algo_h, c->conf.mac_algo_h); + stl_le_p(&crypto_cfg.aead_algo, c->conf.aead_algo); + stl_le_p(&crypto_cfg.max_cipher_key_len, c->conf.max_cipher_key_len); + stl_le_p(&crypto_cfg.max_auth_key_len, c->conf.max_auth_key_len); + stq_le_p(&crypto_cfg.max_size, c->conf.max_size); + + memcpy(config, &crypto_cfg, c->config_size); +} + +static void virtio_crypto_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + dc->props = virtio_crypto_properties; + dc->vmsd = &vmstate_virtio_crypto; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + vdc->realize = virtio_crypto_device_realize; + vdc->unrealize = virtio_crypto_device_unrealize; + vdc->get_config = virtio_crypto_get_config; + vdc->get_features = virtio_crypto_get_features; + vdc->reset = virtio_crypto_reset; +} + +static void virtio_crypto_instance_init(Object *obj) +{ + VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(obj); + + /* + * The default config_size is sizeof(struct virtio_crypto_config). + * Can be overriden with virtio_crypto_set_config_size. + */ + vcrypto->config_size = sizeof(struct virtio_crypto_config); + + object_property_add_link(obj, "cryptodev", + TYPE_CRYPTODEV_BACKEND, + (Object **)&vcrypto->conf.cryptodev, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_UNREF_ON_RELEASE, NULL); +} + +static const TypeInfo virtio_crypto_info = { + .name = TYPE_VIRTIO_CRYPTO, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VirtIOCrypto), + .instance_init = virtio_crypto_instance_init, + .class_init = virtio_crypto_class_init, +}; + +static void virtio_register_types(void) +{ + type_register_static(&virtio_crypto_info); +} + +type_init(virtio_register_types) diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 13798b3cb8..a30270f902 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -89,38 +89,12 @@ typedef struct { uint32_t guest_page_shift; /* virtio-bus */ VirtioBusState bus; - bool ioeventfd_disabled; - bool ioeventfd_started; bool format_transport_address; } VirtIOMMIOProxy; -static bool virtio_mmio_ioeventfd_started(DeviceState *d) +static bool virtio_mmio_ioeventfd_enabled(DeviceState *d) { - VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); - - return proxy->ioeventfd_started; -} - -static void virtio_mmio_ioeventfd_set_started(DeviceState *d, bool started, - bool err) -{ - VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); - - proxy->ioeventfd_started = started; -} - -static bool virtio_mmio_ioeventfd_disabled(DeviceState *d) -{ - VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); - - return !kvm_eventfds_enabled() || proxy->ioeventfd_disabled; -} - -static void virtio_mmio_ioeventfd_set_disabled(DeviceState *d, bool disabled) -{ - VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); - - proxy->ioeventfd_disabled = disabled; + return kvm_eventfds_enabled(); } static int virtio_mmio_ioeventfd_assign(DeviceState *d, @@ -557,10 +531,7 @@ static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data) k->save_config = virtio_mmio_save_config; k->load_config = virtio_mmio_load_config; k->set_guest_notifiers = virtio_mmio_set_guest_notifiers; - k->ioeventfd_started = virtio_mmio_ioeventfd_started; - k->ioeventfd_set_started = virtio_mmio_ioeventfd_set_started; - k->ioeventfd_disabled = virtio_mmio_ioeventfd_disabled; - k->ioeventfd_set_disabled = virtio_mmio_ioeventfd_set_disabled; + k->ioeventfd_enabled = virtio_mmio_ioeventfd_enabled; k->ioeventfd_assign = virtio_mmio_ioeventfd_assign; k->has_variable_vring_alignment = true; bus_class->max_dev = 1; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 06831de5ff..62001b46d7 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -262,34 +262,11 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) return 0; } -static bool virtio_pci_ioeventfd_started(DeviceState *d) +static bool virtio_pci_ioeventfd_enabled(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - return proxy->ioeventfd_started; -} - -static void virtio_pci_ioeventfd_set_started(DeviceState *d, bool started, - bool err) -{ - VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - - proxy->ioeventfd_started = started; -} - -static bool virtio_pci_ioeventfd_disabled(DeviceState *d) -{ - VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - - return proxy->ioeventfd_disabled || - !(proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD); -} - -static void virtio_pci_ioeventfd_set_disabled(DeviceState *d, bool disabled) -{ - VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - - proxy->ioeventfd_disabled = disabled; + return (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) != 0; } #define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000 @@ -1719,10 +1696,6 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx, PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar); } - - if (!kvm_has_many_ioeventfds()) { - proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD; - } } static void virtio_pci_device_unplugged(DeviceState *d) @@ -1751,6 +1724,10 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) bool pcie_port = pci_bus_is_express(pci_dev->bus) && !pci_bus_is_root(pci_dev->bus); + if (!kvm_has_many_ioeventfds()) { + proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD; + } + /* * virtio pci bar layout used by default. * subclasses can re-arrange things if needed. @@ -2539,10 +2516,7 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data) k->device_plugged = virtio_pci_device_plugged; k->device_unplugged = virtio_pci_device_unplugged; k->query_nvectors = virtio_pci_query_nvectors; - k->ioeventfd_started = virtio_pci_ioeventfd_started; - k->ioeventfd_set_started = virtio_pci_ioeventfd_set_started; - k->ioeventfd_disabled = virtio_pci_ioeventfd_disabled; - k->ioeventfd_set_disabled = virtio_pci_ioeventfd_set_disabled; + k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled; k->ioeventfd_assign = virtio_pci_ioeventfd_assign; } diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index b4edea6987..b2a996fa83 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -25,6 +25,8 @@ #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-input.h" #include "hw/virtio/virtio-gpu.h" +#include "hw/virtio/virtio-crypto.h" + #ifdef CONFIG_VIRTFS #include "hw/9pfs/virtio-9p.h" #endif @@ -48,6 +50,7 @@ typedef struct VirtIOInputHIDPCI VirtIOInputHIDPCI; typedef struct VirtIOInputHostPCI VirtIOInputHostPCI; typedef struct VirtIOGPUPCI VirtIOGPUPCI; typedef struct VHostVSockPCI VHostVSockPCI; +typedef struct VirtIOCryptoPCI VirtIOCryptoPCI; /* virtio-pci-bus */ @@ -158,8 +161,6 @@ struct VirtIOPCIProxy { uint32_t guest_features[2]; VirtIOPCIQueue vqs[VIRTIO_QUEUE_MAX]; - bool ioeventfd_disabled; - bool ioeventfd_started; VirtIOIRQFD *vector_irqfd; int nvqs_with_notifiers; VirtioBusState bus; @@ -352,6 +353,18 @@ struct VHostVSockPCI { }; #endif +/* + * virtio-crypto-pci: This extends VirtioPCIProxy. + */ +#define TYPE_VIRTIO_CRYPTO_PCI "virtio-crypto-pci" +#define VIRTIO_CRYPTO_PCI(obj) \ + OBJECT_CHECK(VirtIOCryptoPCI, (obj), TYPE_VIRTIO_CRYPTO_PCI) + +struct VirtIOCryptoPCI { + VirtIOPCIProxy parent_obj; + VirtIOCrypto vdev; +}; + /* Virtio ABI version, if we increment this, we break the guest driver. */ #define VIRTIO_PCI_ABI_VERSION 0 diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index d48d1a98a7..bcbcfe063c 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -97,7 +97,6 @@ struct VirtQueue uint16_t vector; VirtIOHandleOutput handle_output; VirtIOHandleOutput handle_aio_output; - bool use_aio; VirtIODevice *vdev; EventNotifier guest_notifier; EventNotifier host_notifier; @@ -1287,9 +1286,8 @@ void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) } } -static VirtQueue *virtio_add_queue_internal(VirtIODevice *vdev, int queue_size, - VirtIOHandleOutput handle_output, - bool use_aio) +VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + VirtIOHandleOutput handle_output) { int i; @@ -1306,28 +1304,10 @@ static VirtQueue *virtio_add_queue_internal(VirtIODevice *vdev, int queue_size, vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; vdev->vq[i].handle_output = handle_output; vdev->vq[i].handle_aio_output = NULL; - vdev->vq[i].use_aio = use_aio; return &vdev->vq[i]; } -/* Add a virt queue and mark AIO. - * An AIO queue will use the AioContext based event interface instead of the - * default IOHandler and EventNotifier interface. - */ -VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size, - VirtIOHandleOutput handle_output) -{ - return virtio_add_queue_internal(vdev, queue_size, handle_output, true); -} - -/* Add a normal virt queue (on the contrary to the AIO version above. */ -VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, - VirtIOHandleOutput handle_output) -{ - return virtio_add_queue_internal(vdev, queue_size, handle_output, false); -} - void virtio_del_queue(VirtIODevice *vdev, int n) { if (n < 0 || n >= VIRTIO_QUEUE_MAX) { @@ -1635,6 +1615,10 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) vdc->save(vdev, f); } + if (vdc->vmsd) { + vmstate_save_state(f, vdc->vmsd, vdev, NULL); + } + /* Subsections */ vmstate_save_state(f, &vmstate_virtio, vdev, NULL); } @@ -1781,6 +1765,13 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) } } + if (vdc->vmsd) { + ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id); + if (ret) { + return ret; + } + } + /* Subsections */ ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1); if (ret) { @@ -2051,7 +2042,7 @@ void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, } } -static void virtio_queue_host_notifier_read(EventNotifier *n) +void virtio_queue_host_notifier_read(EventNotifier *n) { VirtQueue *vq = container_of(n, VirtQueue, host_notifier); if (event_notifier_test_and_clear(n)) { @@ -2059,32 +2050,6 @@ static void virtio_queue_host_notifier_read(EventNotifier *n) } } -void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, - bool set_handler) -{ - AioContext *ctx = qemu_get_aio_context(); - if (assign && set_handler) { - if (vq->use_aio) { - aio_set_event_notifier(ctx, &vq->host_notifier, true, - virtio_queue_host_notifier_read); - } else { - event_notifier_set_handler(&vq->host_notifier, true, - virtio_queue_host_notifier_read); - } - } else { - if (vq->use_aio) { - aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL); - } else { - event_notifier_set_handler(&vq->host_notifier, true, NULL); - } - } - if (!assign) { - /* Test and clear notifier before after disabling event, - * in case poll callback didn't have time to run. */ - virtio_queue_host_notifier_read(&vq->host_notifier); - } -} - EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) { return &vq->host_notifier; @@ -2118,6 +2083,9 @@ static void virtio_device_realize(DeviceState *dev, Error **errp) VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); Error *err = NULL; + /* Devices should either use vmsd or the load/save methods */ + assert(!vdc->vmsd || !vdc->load); + if (vdc->realize != NULL) { vdc->realize(dev, &err); if (err != NULL) { @@ -2158,15 +2126,102 @@ static Property virtio_properties[] = { DEFINE_PROP_END_OF_LIST(), }; +static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev) +{ + VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); + int n, r, err; + + for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { + VirtQueue *vq = &vdev->vq[n]; + if (!virtio_queue_get_num(vdev, n)) { + continue; + } + r = virtio_bus_set_host_notifier(qbus, n, true); + if (r < 0) { + err = r; + goto assign_error; + } + event_notifier_set_handler(&vq->host_notifier, true, + virtio_queue_host_notifier_read); + } + + for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { + /* Kick right away to begin processing requests already in vring */ + VirtQueue *vq = &vdev->vq[n]; + if (!vq->vring.num) { + continue; + } + event_notifier_set(&vq->host_notifier); + } + return 0; + +assign_error: + while (--n >= 0) { + VirtQueue *vq = &vdev->vq[n]; + if (!virtio_queue_get_num(vdev, n)) { + continue; + } + + event_notifier_set_handler(&vq->host_notifier, true, NULL); + r = virtio_bus_set_host_notifier(qbus, n, false); + assert(r >= 0); + } + return err; +} + +int virtio_device_start_ioeventfd(VirtIODevice *vdev) +{ + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); + VirtioBusState *vbus = VIRTIO_BUS(qbus); + + return virtio_bus_start_ioeventfd(vbus); +} + +static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev) +{ + VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); + int n, r; + + for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { + VirtQueue *vq = &vdev->vq[n]; + + if (!virtio_queue_get_num(vdev, n)) { + continue; + } + event_notifier_set_handler(&vq->host_notifier, true, NULL); + r = virtio_bus_set_host_notifier(qbus, n, false); + assert(r >= 0); + } +} + +void virtio_device_stop_ioeventfd(VirtIODevice *vdev) +{ + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); + VirtioBusState *vbus = VIRTIO_BUS(qbus); + + virtio_bus_stop_ioeventfd(vbus); +} + static void virtio_device_class_init(ObjectClass *klass, void *data) { /* Set the default value here. */ + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = virtio_device_realize; dc->unrealize = virtio_device_unrealize; dc->bus_type = TYPE_VIRTIO_BUS; dc->props = virtio_properties; + vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl; + vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl; +} + +bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev) +{ + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); + VirtioBusState *vbus = VIRTIO_BUS(qbus); + + return virtio_bus_ioeventfd_enabled(vbus); } static const TypeInfo virtio_device_info = { diff --git a/include/block/nbd.h b/include/block/nbd.h index 80610ff31b..3e373f0498 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -1,4 +1,5 @@ /* + * Copyright (C) 2016 Red Hat, Inc. * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> * * Network Block Device @@ -25,52 +26,89 @@ #include "io/channel-socket.h" #include "crypto/tlscreds.h" -/* Note: these are _NOT_ the same as the network representation of an NBD +/* Handshake phase structs - this struct is passed on the wire */ + +struct nbd_option { + uint64_t magic; /* NBD_OPTS_MAGIC */ + uint32_t option; /* NBD_OPT_* */ + uint32_t length; +} QEMU_PACKED; +typedef struct nbd_option nbd_option; + +struct nbd_opt_reply { + uint64_t magic; /* NBD_REP_MAGIC */ + uint32_t option; /* NBD_OPT_* */ + uint32_t type; /* NBD_REP_* */ + uint32_t length; +} QEMU_PACKED; +typedef struct nbd_opt_reply nbd_opt_reply; + +/* Transmission phase structs + * + * Note: these are _NOT_ the same as the network representation of an NBD * request and reply! */ -struct nbd_request { +struct NBDRequest { uint64_t handle; uint64_t from; uint32_t len; - uint32_t type; + uint16_t flags; /* NBD_CMD_FLAG_* */ + uint16_t type; /* NBD_CMD_* */ }; +typedef struct NBDRequest NBDRequest; -struct nbd_reply { +struct NBDReply { uint64_t handle; uint32_t error; }; +typedef struct NBDReply NBDReply; +/* Transmission (export) flags: sent from server to client during handshake, + but describe what will happen during transmission */ #define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */ #define NBD_FLAG_READ_ONLY (1 << 1) /* Device is read-only */ #define NBD_FLAG_SEND_FLUSH (1 << 2) /* Send FLUSH */ #define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit Access) */ #define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */ #define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */ +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */ -/* New-style global flags. */ -#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ +/* New-style handshake (global) flags, sent from server to client, and + control what will happen during handshake phase. */ +#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ +#define NBD_FLAG_NO_ZEROES (1 << 1) /* End handshake without zeroes. */ -/* New-style client flags. */ -#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ +/* New-style client flags, sent from client to server to control what happens + during handshake phase. */ +#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ +#define NBD_FLAG_C_NO_ZEROES (1 << 1) /* End handshake without zeroes. */ /* Reply types. */ +#define NBD_REP_ERR(value) ((UINT32_C(1) << 31) | (value)) + #define NBD_REP_ACK (1) /* Data sending finished. */ #define NBD_REP_SERVER (2) /* Export description. */ -#define NBD_REP_ERR_UNSUP ((UINT32_C(1) << 31) | 1) /* Unknown option. */ -#define NBD_REP_ERR_POLICY ((UINT32_C(1) << 31) | 2) /* Server denied */ -#define NBD_REP_ERR_INVALID ((UINT32_C(1) << 31) | 3) /* Invalid length. */ -#define NBD_REP_ERR_TLS_REQD ((UINT32_C(1) << 31) | 5) /* TLS required */ +#define NBD_REP_ERR_UNSUP NBD_REP_ERR(1) /* Unknown option */ +#define NBD_REP_ERR_POLICY NBD_REP_ERR(2) /* Server denied */ +#define NBD_REP_ERR_INVALID NBD_REP_ERR(3) /* Invalid length */ +#define NBD_REP_ERR_PLATFORM NBD_REP_ERR(4) /* Not compiled in */ +#define NBD_REP_ERR_TLS_REQD NBD_REP_ERR(5) /* TLS required */ +#define NBD_REP_ERR_SHUTDOWN NBD_REP_ERR(7) /* Server shutting down */ -#define NBD_CMD_MASK_COMMAND 0x0000ffff -#define NBD_CMD_FLAG_FUA (1 << 16) +/* Request flags, sent from client to server during transmission phase */ +#define NBD_CMD_FLAG_FUA (1 << 0) /* 'force unit access' during write */ +#define NBD_CMD_FLAG_NO_HOLE (1 << 1) /* don't punch hole on zero run */ +/* Supported request types */ enum { NBD_CMD_READ = 0, NBD_CMD_WRITE = 1, NBD_CMD_DISC = 2, NBD_CMD_FLUSH = 3, - NBD_CMD_TRIM = 4 + NBD_CMD_TRIM = 4, + /* 5 reserved for failed experiment NBD_CMD_CACHE */ + NBD_CMD_WRITE_ZEROES = 6, }; #define NBD_DEFAULT_PORT 10809 @@ -95,8 +133,8 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, QIOChannel **outioc, off_t *size, Error **errp); int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size); -ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request); -ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply); +ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request); +ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply); int nbd_client(int fd); int nbd_disconnect(int fd); @@ -115,6 +153,7 @@ BlockBackend *nbd_export_get_blockdev(NBDExport *exp); NBDExport *nbd_export_find(const char *name); void nbd_export_set_name(NBDExport *exp, const char *name); +void nbd_export_set_description(NBDExport *exp, const char *description); void nbd_export_close_all(void); void nbd_client_new(NBDExport *exp, diff --git a/include/glib-compat.h b/include/glib-compat.h index 3f8370b3e4..acf254d2a0 100644 --- a/include/glib-compat.h +++ b/include/glib-compat.h @@ -315,4 +315,17 @@ static inline void g_source_set_name_by_id(guint tag, const char *name) } #endif +#if !GLIB_CHECK_VERSION(2, 36, 0) +/* Always fail. This will not include error_report output in the test log, + * sending it instead to stderr. + */ +#define g_test_initialized() (0) +#endif +#if !GLIB_CHECK_VERSION(2, 38, 0) +#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS +#error schizophrenic detection of glib subprocess testing +#endif +#define g_test_subprocess() (0) +#endif + #endif diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h index da4ef7fbd3..901a4ae876 100644 --- a/include/hw/acpi/acpi_dev_interface.h +++ b/include/hw/acpi/acpi_dev_interface.h @@ -10,6 +10,7 @@ typedef enum { ACPI_PCI_HOTPLUG_STATUS = 2, ACPI_CPU_HOTPLUG_STATUS = 4, ACPI_MEMORY_HOTPLUG_STATUS = 8, + ACPI_NVDIMM_HOTPLUG_STATUS = 16, } AcpiEventStatusBits; #define TYPE_ACPI_DEVICE_IF "acpi-device-interface" diff --git a/include/hw/hotplug.h b/include/hw/hotplug.h index c0db869f85..10ca5b6504 100644 --- a/include/hw/hotplug.h +++ b/include/hw/hotplug.h @@ -47,6 +47,7 @@ typedef void (*hotplug_fn)(HotplugHandler *plug_handler, * @parent: Opaque parent interface. * @pre_plug: pre plug callback called at start of device.realize(true) * @plug: plug callback called at end of device.realize(true). + * @post_pug: post plug callback called after device is successfully plugged. * @unplug_request: unplug request callback. * Used as a means to initiate device unplug for devices that * require asynchronous unplug handling. @@ -61,6 +62,7 @@ typedef struct HotplugHandlerClass { /* <public> */ hotplug_fn pre_plug; hotplug_fn plug; + hotplug_fn post_plug; hotplug_fn unplug_request; hotplug_fn unplug; } HotplugHandlerClass; @@ -83,6 +85,14 @@ void hotplug_handler_pre_plug(HotplugHandler *plug_handler, DeviceState *plugged_dev, Error **errp); +/** + * hotplug_handler_post_plug: + * + * Call #HotplugHandlerClass.post_plug callback of @plug_handler. + */ +void hotplug_handler_post_plug(HotplugHandler *plug_handler, + DeviceState *plugged_dev, + Error **errp); /** * hotplug_handler_unplug_request: diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h index 1cfe9e01c4..33cd421ace 100644 --- a/include/hw/mem/nvdimm.h +++ b/include/hw/mem/nvdimm.h @@ -98,12 +98,35 @@ typedef struct NVDIMMClass NVDIMMClass; #define NVDIMM_ACPI_IO_BASE 0x0a18 #define NVDIMM_ACPI_IO_LEN 4 +/* + * The buffer, @fit, saves the FIT info for all the presented NVDIMM + * devices which is updated after the NVDIMM device is plugged or + * unplugged. + * + * Rules to use the buffer: + * 1) the user should hold the @lock to access the buffer. + * 2) mark @dirty whenever the buffer is updated. + * + * These rules preserve NVDIMM ACPI _FIT method to read incomplete + * or obsolete fit info if fit update happens during multiple RFIT + * calls. + */ +struct NvdimmFitBuffer { + QemuMutex lock; + GArray *fit; + bool dirty; +}; +typedef struct NvdimmFitBuffer NvdimmFitBuffer; + struct AcpiNVDIMMState { /* detect if NVDIMM support is enabled. */ bool is_enabled; /* the data of the fw_cfg file NVDIMM_DSM_MEM_FILE. */ GArray *dsm_mem; + + NvdimmFitBuffer fit_buf; + /* the IO region used by OSPM to transfer control to QEMU. */ MemoryRegion io_mr; }; @@ -112,5 +135,7 @@ typedef struct AcpiNVDIMMState AcpiNVDIMMState; void nvdimm_init_acpi_state(AcpiNVDIMMState *state, MemoryRegion *io, FWCfgState *fw_cfg, Object *owner); void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data, - BIOSLinker *linker, GArray *dsm_dma_arrea); + BIOSLinker *linker, AcpiNVDIMMState *state, + uint32_t ram_slots); +void nvdimm_acpi_hotplug(AcpiNVDIMMState *state); #endif diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h index 2e4b67ea50..fdf7fdab81 100644 --- a/include/hw/virtio/virtio-bus.h +++ b/include/hw/virtio/virtio-bus.h @@ -70,21 +70,11 @@ typedef struct VirtioBusClass { void (*device_unplugged)(DeviceState *d); int (*query_nvectors)(DeviceState *d); /* - * ioeventfd handling: if the transport implements ioeventfd_started, - * it must implement the other ioeventfd callbacks as well + * ioeventfd handling: if the transport implements ioeventfd_assign, + * it must implement ioeventfd_enabled as well. */ - /* Returns true if the ioeventfd has been started for the device. */ - bool (*ioeventfd_started)(DeviceState *d); - /* - * Sets the 'ioeventfd started' state after the ioeventfd has been - * started/stopped for the device. err signifies whether an error - * had occurred. - */ - void (*ioeventfd_set_started)(DeviceState *d, bool started, bool err); - /* Returns true if the ioeventfd has been disabled for the device. */ - bool (*ioeventfd_disabled)(DeviceState *d); - /* Sets the 'ioeventfd disabled' state for the device. */ - void (*ioeventfd_set_disabled)(DeviceState *d, bool disabled); + /* Returns true if the ioeventfd is enabled for the device. */ + bool (*ioeventfd_enabled)(DeviceState *d); /* * Assigns/deassigns the ioeventfd backing for the transport on * the device for queue number n. Returns an error value on @@ -102,6 +92,11 @@ typedef struct VirtioBusClass { struct VirtioBusState { BusState parent_obj; + + /* + * Set if ioeventfd has been started. + */ + bool ioeventfd_started; }; void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp); @@ -130,8 +125,10 @@ static inline VirtIODevice *virtio_bus_get_device(VirtioBusState *bus) return (VirtIODevice *)qdev; } +/* Return whether the proxy allows ioeventfd. */ +bool virtio_bus_ioeventfd_enabled(VirtioBusState *bus); /* Start the ioeventfd. */ -void virtio_bus_start_ioeventfd(VirtioBusState *bus); +int virtio_bus_start_ioeventfd(VirtioBusState *bus); /* Stop the ioeventfd. */ void virtio_bus_stop_ioeventfd(VirtioBusState *bus); /* Switch from/to the generic ioeventfd handler */ diff --git a/include/hw/virtio/virtio-crypto.h b/include/hw/virtio/virtio-crypto.h new file mode 100644 index 0000000000..a00a0bfaba --- /dev/null +++ b/include/hw/virtio/virtio-crypto.h @@ -0,0 +1,101 @@ +/* + * Virtio crypto Support + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Gonglei <arei.gonglei@huawei.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#ifndef _QEMU_VIRTIO_CRYPTO_H +#define _QEMU_VIRTIO_CRYPTO_H + +#include "standard-headers/linux/virtio_crypto.h" +#include "hw/virtio/virtio.h" +#include "sysemu/iothread.h" +#include "sysemu/cryptodev.h" + + +#define DEBUG_VIRTIO_CRYPTO 0 + +#define DPRINTF(fmt, ...) \ +do { \ + if (DEBUG_VIRTIO_CRYPTO) { \ + fprintf(stderr, "virtio_crypto: " fmt, ##__VA_ARGS__); \ + } \ +} while (0) + + +#define TYPE_VIRTIO_CRYPTO "virtio-crypto-device" +#define VIRTIO_CRYPTO(obj) \ + OBJECT_CHECK(VirtIOCrypto, (obj), TYPE_VIRTIO_CRYPTO) +#define VIRTIO_CRYPTO_GET_PARENT_CLASS(obj) \ + OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_CRYPTO) + + +typedef struct VirtIOCryptoConf { + CryptoDevBackend *cryptodev; + + /* Supported service mask */ + uint32_t crypto_services; + + /* Detailed algorithms mask */ + uint32_t cipher_algo_l; + uint32_t cipher_algo_h; + uint32_t hash_algo; + uint32_t mac_algo_l; + uint32_t mac_algo_h; + uint32_t aead_algo; + + /* Maximum length of cipher key */ + uint32_t max_cipher_key_len; + /* Maximum length of authenticated key */ + uint32_t max_auth_key_len; + /* Maximum size of each crypto request's content */ + uint64_t max_size; +} VirtIOCryptoConf; + +struct VirtIOCrypto; + +typedef struct VirtIOCryptoReq { + VirtQueueElement elem; + /* flags of operation, such as type of algorithm */ + uint32_t flags; + struct virtio_crypto_inhdr *in; + struct iovec *in_iov; /* Head address of dest iovec */ + unsigned int in_num; /* Number of dest iovec */ + size_t in_len; + VirtQueue *vq; + struct VirtIOCrypto *vcrypto; + union { + CryptoDevBackendSymOpInfo *sym_op_info; + } u; +} VirtIOCryptoReq; + +typedef struct VirtIOCryptoQueue { + VirtQueue *dataq; + QEMUBH *dataq_bh; + struct VirtIOCrypto *vcrypto; +} VirtIOCryptoQueue; + +typedef struct VirtIOCrypto { + VirtIODevice parent_obj; + + VirtQueue *ctrl_vq; + VirtIOCryptoQueue *vqs; + VirtIOCryptoConf conf; + CryptoDevBackend *cryptodev; + + uint32_t max_queues; + uint32_t status; + + int multiqueue; + uint32_t curr_queues; + size_t config_size; +} VirtIOCrypto; + +#endif /* _QEMU_VIRTIO_CRYPTO_H */ diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index a1e0cfb449..9fbc7d7475 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -134,9 +134,9 @@ void virtio_scsi_free_req(VirtIOSCSIReq *req); void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, uint32_t event, uint32_t reason); -void virtio_scsi_set_iothread(VirtIOSCSI *s, IOThread *iothread); -void virtio_scsi_dataplane_start(VirtIOSCSI *s); -void virtio_scsi_dataplane_stop(VirtIOSCSI *s); +void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp); +int virtio_scsi_dataplane_start(VirtIODevice *s); +void virtio_scsi_dataplane_stop(VirtIODevice *s); void virtio_scsi_dataplane_notify(VirtIODevice *vdev, VirtIOSCSIReq *req); #endif /* QEMU_VIRTIO_SCSI_H */ diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index b913aac455..ac65d6a594 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -125,8 +125,14 @@ typedef struct VirtioDeviceClass { * must mask in frontend instead. */ void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask); + int (*start_ioeventfd)(VirtIODevice *vdev); + void (*stop_ioeventfd)(VirtIODevice *vdev); + /* Saving and loading of a device; trying to deprecate save/load + * use vmsd for new devices. + */ void (*save)(VirtIODevice *vdev, QEMUFile *f); int (*load)(VirtIODevice *vdev, QEMUFile *f, int version_id); + const VMStateDescription *vmsd; } VirtioDeviceClass; void virtio_instance_init_common(Object *proxy_obj, void *data, @@ -146,9 +152,6 @@ typedef void (*VirtIOHandleOutput)(VirtIODevice *, VirtQueue *); VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, VirtIOHandleOutput handle_output); -VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size, - VirtIOHandleOutput handle_output); - void virtio_del_queue(VirtIODevice *vdev, int n); void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num); @@ -265,9 +268,11 @@ uint16_t virtio_get_queue_index(VirtQueue *vq); EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq); void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, bool with_irqfd); +int virtio_device_start_ioeventfd(VirtIODevice *vdev); +void virtio_device_stop_ioeventfd(VirtIODevice *vdev); +bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev); EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); -void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, - bool set_handler); +void virtio_queue_host_notifier_read(EventNotifier *n); void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, void (*fn)(VirtIODevice *, VirtQueue *)); diff --git a/include/qemu/error-report.h b/include/qemu/error-report.h index 499ec8b12a..3001865896 100644 --- a/include/qemu/error-report.h +++ b/include/qemu/error-report.h @@ -32,6 +32,7 @@ void loc_set_file(const char *fname, int lno); void error_vprintf(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0); void error_printf(const char *fmt, ...) GCC_FMT_ATTR(1, 2); +void error_vprintf_unless_qmp(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0); void error_printf_unless_qmp(const char *fmt, ...) GCC_FMT_ATTR(1, 2); void error_set_progname(const char *argv0); void error_vreport(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0); diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 0e3c330e6b..689f253ea7 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -128,6 +128,9 @@ extern int daemon(int, int); #if !defined(EMEDIUMTYPE) #define EMEDIUMTYPE 4098 #endif +#if !defined(ESHUTDOWN) +#define ESHUTDOWN 4099 +#endif #ifndef TIME_MAX #define TIME_MAX LONG_MAX #endif diff --git a/include/standard-headers/linux/virtio_crypto.h b/include/standard-headers/linux/virtio_crypto.h new file mode 100644 index 0000000000..82275a84d8 --- /dev/null +++ b/include/standard-headers/linux/virtio_crypto.h @@ -0,0 +1,429 @@ +#ifndef _LINUX_VIRTIO_CRYPTO_H +#define _LINUX_VIRTIO_CRYPTO_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ + +#include "standard-headers/linux/types.h" +#include "standard-headers/linux/virtio_config.h" +#include "standard-headers/linux/virtio_types.h" + + +#define VIRTIO_CRYPTO_SERVICE_CIPHER 0 +#define VIRTIO_CRYPTO_SERVICE_HASH 1 +#define VIRTIO_CRYPTO_SERVICE_MAC 2 +#define VIRTIO_CRYPTO_SERVICE_AEAD 3 + +#define VIRTIO_CRYPTO_OPCODE(service, op) (((service) << 8) | (op)) + +struct virtio_crypto_ctrl_header { +#define VIRTIO_CRYPTO_CIPHER_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x02) +#define VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x03) +#define VIRTIO_CRYPTO_HASH_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x02) +#define VIRTIO_CRYPTO_HASH_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x03) +#define VIRTIO_CRYPTO_MAC_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x02) +#define VIRTIO_CRYPTO_MAC_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x03) +#define VIRTIO_CRYPTO_AEAD_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02) +#define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03) + __virtio32 opcode; + __virtio32 algo; + __virtio32 flag; + /* data virtqueue id */ + __virtio32 queue_id; +}; + +struct virtio_crypto_cipher_session_para { +#define VIRTIO_CRYPTO_NO_CIPHER 0 +#define VIRTIO_CRYPTO_CIPHER_ARC4 1 +#define VIRTIO_CRYPTO_CIPHER_AES_ECB 2 +#define VIRTIO_CRYPTO_CIPHER_AES_CBC 3 +#define VIRTIO_CRYPTO_CIPHER_AES_CTR 4 +#define VIRTIO_CRYPTO_CIPHER_DES_ECB 5 +#define VIRTIO_CRYPTO_CIPHER_DES_CBC 6 +#define VIRTIO_CRYPTO_CIPHER_3DES_ECB 7 +#define VIRTIO_CRYPTO_CIPHER_3DES_CBC 8 +#define VIRTIO_CRYPTO_CIPHER_3DES_CTR 9 +#define VIRTIO_CRYPTO_CIPHER_KASUMI_F8 10 +#define VIRTIO_CRYPTO_CIPHER_SNOW3G_UEA2 11 +#define VIRTIO_CRYPTO_CIPHER_AES_F8 12 +#define VIRTIO_CRYPTO_CIPHER_AES_XTS 13 +#define VIRTIO_CRYPTO_CIPHER_ZUC_EEA3 14 + __virtio32 algo; + /* length of key */ + __virtio32 keylen; + +#define VIRTIO_CRYPTO_OP_ENCRYPT 1 +#define VIRTIO_CRYPTO_OP_DECRYPT 2 + /* encrypt or decrypt */ + __virtio32 op; + __virtio32 padding; +}; + +struct virtio_crypto_session_input { + /* Device-writable part */ + __virtio64 session_id; + __virtio32 status; + __virtio32 padding; +}; + +struct virtio_crypto_cipher_session_req { + struct virtio_crypto_cipher_session_para para; +}; + +struct virtio_crypto_hash_session_para { +#define VIRTIO_CRYPTO_NO_HASH 0 +#define VIRTIO_CRYPTO_HASH_MD5 1 +#define VIRTIO_CRYPTO_HASH_SHA1 2 +#define VIRTIO_CRYPTO_HASH_SHA_224 3 +#define VIRTIO_CRYPTO_HASH_SHA_256 4 +#define VIRTIO_CRYPTO_HASH_SHA_384 5 +#define VIRTIO_CRYPTO_HASH_SHA_512 6 +#define VIRTIO_CRYPTO_HASH_SHA3_224 7 +#define VIRTIO_CRYPTO_HASH_SHA3_256 8 +#define VIRTIO_CRYPTO_HASH_SHA3_384 9 +#define VIRTIO_CRYPTO_HASH_SHA3_512 10 +#define VIRTIO_CRYPTO_HASH_SHA3_SHAKE128 11 +#define VIRTIO_CRYPTO_HASH_SHA3_SHAKE256 12 + __virtio32 algo; + /* hash result length */ + __virtio32 hash_result_len; +}; + +struct virtio_crypto_hash_create_session_req { + struct virtio_crypto_hash_session_para para; +}; + +struct virtio_crypto_mac_session_para { +#define VIRTIO_CRYPTO_NO_MAC 0 +#define VIRTIO_CRYPTO_MAC_HMAC_MD5 1 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA1 2 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_224 3 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_256 4 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_384 5 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_512 6 +#define VIRTIO_CRYPTO_MAC_CMAC_3DES 25 +#define VIRTIO_CRYPTO_MAC_CMAC_AES 26 +#define VIRTIO_CRYPTO_MAC_KASUMI_F9 27 +#define VIRTIO_CRYPTO_MAC_SNOW3G_UIA2 28 +#define VIRTIO_CRYPTO_MAC_GMAC_AES 41 +#define VIRTIO_CRYPTO_MAC_GMAC_TWOFISH 42 +#define VIRTIO_CRYPTO_MAC_CBCMAC_AES 49 +#define VIRTIO_CRYPTO_MAC_CBCMAC_KASUMI_F9 50 +#define VIRTIO_CRYPTO_MAC_XCBC_AES 53 + __virtio32 algo; + /* hash result length */ + __virtio32 hash_result_len; + /* length of authenticated key */ + __virtio32 auth_key_len; + __virtio32 padding; +}; + +struct virtio_crypto_mac_create_session_req { + struct virtio_crypto_mac_session_para para; +}; + +struct virtio_crypto_aead_session_para { +#define VIRTIO_CRYPTO_NO_AEAD 0 +#define VIRTIO_CRYPTO_AEAD_GCM 1 +#define VIRTIO_CRYPTO_AEAD_CCM 2 +#define VIRTIO_CRYPTO_AEAD_CHACHA20_POLY1305 3 + __virtio32 algo; + /* length of key */ + __virtio32 key_len; + /* digest result length */ + __virtio32 digest_result_len; + /* length of the additional authenticated data (AAD) in bytes */ + __virtio32 aad_len; + /* encrypt or decrypt, See above VIRTIO_CRYPTO_OP_* */ + __virtio32 op; + __virtio32 padding; +}; + +struct virtio_crypto_aead_create_session_req { + struct virtio_crypto_aead_session_para para; +}; + +struct virtio_crypto_alg_chain_session_para { +#define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER 1 +#define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH 2 + __virtio32 alg_chain_order; +/* Plain hash */ +#define VIRTIO_CRYPTO_SYM_HASH_MODE_PLAIN 1 +/* Authenticated hash (mac) */ +#define VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH 2 +/* Nested hash */ +#define VIRTIO_CRYPTO_SYM_HASH_MODE_NESTED 3 + __virtio32 hash_mode; + struct virtio_crypto_cipher_session_para cipher_param; + union { + struct virtio_crypto_hash_session_para hash_param; + struct virtio_crypto_mac_session_para mac_param; + } u; + /* length of the additional authenticated data (AAD) in bytes */ + __virtio32 aad_len; + __virtio32 padding; +}; + +struct virtio_crypto_alg_chain_session_req { + struct virtio_crypto_alg_chain_session_para para; +}; + +struct virtio_crypto_sym_create_session_req { + union { + struct virtio_crypto_cipher_session_req cipher; + struct virtio_crypto_alg_chain_session_req chain; + } u; + + /* Device-readable part */ + +/* No operation */ +#define VIRTIO_CRYPTO_SYM_OP_NONE 0 +/* Cipher only operation on the data */ +#define VIRTIO_CRYPTO_SYM_OP_CIPHER 1 +/* Chain any cipher with any hash or mac operation. The order + depends on the value of alg_chain_order param */ +#define VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING 2 + __virtio32 op_type; + __virtio32 padding; +}; + +struct virtio_crypto_destroy_session_req { + /* Device-readable part */ + __virtio64 session_id; +}; + +/* The request of the control viritqueue's packet */ +struct virtio_crypto_op_ctrl_req { + struct virtio_crypto_ctrl_header header; + + union { + struct virtio_crypto_sym_create_session_req sym_create_session; + struct virtio_crypto_hash_create_session_req hash_create_session; + struct virtio_crypto_mac_create_session_req mac_create_session; + struct virtio_crypto_aead_create_session_req aead_create_session; + struct virtio_crypto_destroy_session_req destroy_session; + } u; +}; + +struct virtio_crypto_op_header { +#define VIRTIO_CRYPTO_CIPHER_ENCRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x00) +#define VIRTIO_CRYPTO_CIPHER_DECRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x01) +#define VIRTIO_CRYPTO_HASH \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x00) +#define VIRTIO_CRYPTO_MAC \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x00) +#define VIRTIO_CRYPTO_AEAD_ENCRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00) +#define VIRTIO_CRYPTO_AEAD_DECRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01) + __virtio32 opcode; + /* algo should be service-specific algorithms */ + __virtio32 algo; + /* session_id should be service-specific algorithms */ + __virtio64 session_id; + /* control flag to control the request */ + __virtio32 flag; + __virtio32 padding; +}; + +struct virtio_crypto_cipher_para { + /* + * Byte Length of valid IV/Counter + * + * - For block ciphers in CBC or F8 mode, or for Kasumi in F8 mode, or for + * SNOW3G in UEA2 mode, this is the length of the IV (which + * must be the same as the block length of the cipher). + * - For block ciphers in CTR mode, this is the length of the counter + * (which must be the same as the block length of the cipher). + * - For AES-XTS, this is the 128bit tweak, i, from IEEE Std 1619-2007. + * + * The IV/Counter will be updated after every partial cryptographic + * operation. + */ + __virtio32 iv_len; + /* length of source data */ + __virtio32 src_data_len; + /* length of dst data */ + __virtio32 dst_data_len; + __virtio32 padding; +}; + +struct virtio_crypto_hash_para { + /* length of source data */ + __virtio32 src_data_len; + /* hash result length */ + __virtio32 hash_result_len; +}; + +struct virtio_crypto_mac_para { + struct virtio_crypto_hash_para hash; +}; + +struct virtio_crypto_aead_para { + /* + * Byte Length of valid IV data pointed to by the below iv_addr + * parameter. + * + * - For GCM mode, this is either 12 (for 96-bit IVs) or 16, in which + * case iv_addr points to J0. + * - For CCM mode, this is the length of the nonce, which can be in the + * range 7 to 13 inclusive. + */ + __virtio32 iv_len; + /* length of additional auth data */ + __virtio32 aad_len; + /* length of source data */ + __virtio32 src_data_len; + /* length of dst data */ + __virtio32 dst_data_len; +}; + +struct virtio_crypto_cipher_data_req { + /* Device-readable part */ + struct virtio_crypto_cipher_para para; +}; + +struct virtio_crypto_hash_data_req { + /* Device-readable part */ + struct virtio_crypto_hash_para para; +}; + +struct virtio_crypto_mac_data_req { + /* Device-readable part */ + struct virtio_crypto_mac_para para; +}; + +struct virtio_crypto_alg_chain_data_para { + __virtio32 iv_len; + /* Length of source data */ + __virtio32 src_data_len; + /* Length of destination data */ + __virtio32 dst_data_len; + /* Starting point for cipher processing in source data */ + __virtio32 cipher_start_src_offset; + /* Length of the source data that the cipher will be computed on */ + __virtio32 len_to_cipher; + /* Starting point for hash processing in source data */ + __virtio32 hash_start_src_offset; + /* Length of the source data that the hash will be computed on */ + __virtio32 len_to_hash; + /* Length of the additional auth data */ + __virtio32 aad_len; + /* Length of the hash result */ + __virtio32 hash_result_len; + __virtio32 reserved; +}; + +struct virtio_crypto_alg_chain_data_req { + /* Device-readable part */ + struct virtio_crypto_alg_chain_data_para para; +}; + +struct virtio_crypto_sym_data_req { + union { + struct virtio_crypto_cipher_data_req cipher; + struct virtio_crypto_alg_chain_data_req chain; + } u; + + /* See above VIRTIO_CRYPTO_SYM_OP_* */ + __virtio32 op_type; + __virtio32 padding; +}; + +struct virtio_crypto_aead_data_req { + /* Device-readable part */ + struct virtio_crypto_aead_para para; +}; + +/* The request of the data viritqueue's packet */ +struct virtio_crypto_op_data_req { + struct virtio_crypto_op_header header; + + union { + struct virtio_crypto_sym_data_req sym_req; + struct virtio_crypto_hash_data_req hash_req; + struct virtio_crypto_mac_data_req mac_req; + struct virtio_crypto_aead_data_req aead_req; + } u; +}; + +#define VIRTIO_CRYPTO_OK 0 +#define VIRTIO_CRYPTO_ERR 1 +#define VIRTIO_CRYPTO_BADMSG 2 +#define VIRTIO_CRYPTO_NOTSUPP 3 +#define VIRTIO_CRYPTO_INVSESS 4 /* Invaild session id */ + +/* The accelerator hardware is ready */ +#define VIRTIO_CRYPTO_S_HW_READY (1 << 0) +#define VIRTIO_CRYPTO_S_STARTED (1 << 1) + +struct virtio_crypto_config { + /* See VIRTIO_CRYPTO_* above */ + __virtio32 status; + + /* + * Maximum number of data queue legal values are between 1 and 0x8000 + */ + __virtio32 max_dataqueues; + + /* Specifies the services mask which the devcie support, + see VIRTIO_CRYPTO_SERVICE_* above */ + __virtio32 crypto_services; + + /* Detailed algorithms mask */ + __virtio32 cipher_algo_l; + __virtio32 cipher_algo_h; + __virtio32 hash_algo; + __virtio32 mac_algo_l; + __virtio32 mac_algo_h; + __virtio32 aead_algo; + + /* Maximum length of cipher key */ + uint32_t max_cipher_key_len; + /* Maximum length of authenticated key */ + uint32_t max_auth_key_len; + + __virtio32 reserve; + + /* The maximum size of per request's content */ + __virtio64 max_size; +}; + +struct virtio_crypto_inhdr { + /* See VIRTIO_CRYPTO_* above */ + uint8_t status; +}; + +#endif /* _LINUX_VIRTIO_CRYPTO_H */ diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h index 3228d58223..fe74e422d4 100644 --- a/include/standard-headers/linux/virtio_ids.h +++ b/include/standard-headers/linux/virtio_ids.h @@ -42,5 +42,5 @@ #define VIRTIO_ID_GPU 16 /* virtio GPU */ #define VIRTIO_ID_INPUT 18 /* virtio input */ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ - +#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/sysemu/cryptodev.h b/include/sysemu/cryptodev.h new file mode 100644 index 0000000000..84526c0d35 --- /dev/null +++ b/include/sysemu/cryptodev.h @@ -0,0 +1,298 @@ +/* + * QEMU Crypto Device Implementation + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Gonglei <arei.gonglei@huawei.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ +#ifndef CRYPTODEV_H +#define CRYPTODEV_H + +#include "qom/object.h" +#include "qemu-common.h" + +/** + * CryptoDevBackend: + * + * The CryptoDevBackend object is an interface + * for different cryptodev backends, which provides crypto + * operation wrapper. + * + */ + +#define TYPE_CRYPTODEV_BACKEND "cryptodev-backend" + +#define CRYPTODEV_BACKEND(obj) \ + OBJECT_CHECK(CryptoDevBackend, \ + (obj), TYPE_CRYPTODEV_BACKEND) +#define CRYPTODEV_BACKEND_GET_CLASS(obj) \ + OBJECT_GET_CLASS(CryptoDevBackendClass, \ + (obj), TYPE_CRYPTODEV_BACKEND) +#define CRYPTODEV_BACKEND_CLASS(klass) \ + OBJECT_CLASS_CHECK(CryptoDevBackendClass, \ + (klass), TYPE_CRYPTODEV_BACKEND) + + +#define MAX_CRYPTO_QUEUE_NUM 64 + +typedef struct CryptoDevBackendConf CryptoDevBackendConf; +typedef struct CryptoDevBackendPeers CryptoDevBackendPeers; +typedef struct CryptoDevBackendClient + CryptoDevBackendClient; +typedef struct CryptoDevBackend CryptoDevBackend; + +enum CryptoDevBackendAlgType { + CRYPTODEV_BACKEND_ALG_SYM, + CRYPTODEV_BACKEND_ALG__MAX, +}; + +/** + * CryptoDevBackendSymSessionInfo: + * + * @op_code: operation code (refer to virtio_crypto.h) + * @cipher_alg: algorithm type of CIPHER + * @key_len: byte length of cipher key + * @hash_alg: algorithm type of HASH/MAC + * @hash_result_len: byte length of HASH operation result + * @auth_key_len: byte length of authenticated key + * @add_len: byte length of additional authenticated data + * @op_type: operation type (refer to virtio_crypto.h) + * @direction: encryption or direction for CIPHER + * @hash_mode: HASH mode for HASH operation (refer to virtio_crypto.h) + * @alg_chain_order: order of algorithm chaining (CIPHER then HASH, + * or HASH then CIPHER) + * @cipher_key: point to a key of CIPHER + * @auth_key: point to an authenticated key of MAC + * + */ +typedef struct CryptoDevBackendSymSessionInfo { + /* corresponding with virtio crypto spec */ + uint32_t op_code; + uint32_t cipher_alg; + uint32_t key_len; + uint32_t hash_alg; + uint32_t hash_result_len; + uint32_t auth_key_len; + uint32_t add_len; + uint8_t op_type; + uint8_t direction; + uint8_t hash_mode; + uint8_t alg_chain_order; + uint8_t *cipher_key; + uint8_t *auth_key; +} CryptoDevBackendSymSessionInfo; + +/** + * CryptoDevBackendSymOpInfo: + * + * @session_id: session index which was previously + * created by cryptodev_backend_sym_create_session() + * @aad_len: byte length of additional authenticated data + * @iv_len: byte length of initialization vector or counter + * @src_len: byte length of source data + * @dst_len: byte length of destination data + * @digest_result_len: byte length of hash digest result + * @hash_start_src_offset: Starting point for hash processing, specified + * as number of bytes from start of packet in source data, only used for + * algorithm chain + * @cipher_start_src_offset: Starting point for cipher processing, specified + * as number of bytes from start of packet in source data, only used for + * algorithm chain + * @len_to_hash: byte length of source data on which the hash + * operation will be computed, only used for algorithm chain + * @len_to_cipher: byte length of source data on which the cipher + * operation will be computed, only used for algorithm chain + * @op_type: operation type (refer to virtio_crypto.h) + * @iv: point to the initialization vector or counter + * @src: point to the source data + * @dst: point to the destination data + * @aad_data: point to the additional authenticated data + * @digest_result: point to the digest result data + * @data[0]: point to the extensional memory by one memory allocation + * + */ +typedef struct CryptoDevBackendSymOpInfo { + uint64_t session_id; + uint32_t aad_len; + uint32_t iv_len; + uint32_t src_len; + uint32_t dst_len; + uint32_t digest_result_len; + uint32_t hash_start_src_offset; + uint32_t cipher_start_src_offset; + uint32_t len_to_hash; + uint32_t len_to_cipher; + uint8_t op_type; + uint8_t *iv; + uint8_t *src; + uint8_t *dst; + uint8_t *aad_data; + uint8_t *digest_result; + uint8_t data[0]; +} CryptoDevBackendSymOpInfo; + +typedef struct CryptoDevBackendClass { + ObjectClass parent_class; + + void (*init)(CryptoDevBackend *backend, Error **errp); + void (*cleanup)(CryptoDevBackend *backend, Error **errp); + + int64_t (*create_session)(CryptoDevBackend *backend, + CryptoDevBackendSymSessionInfo *sess_info, + uint32_t queue_index, Error **errp); + int (*close_session)(CryptoDevBackend *backend, + uint64_t session_id, + uint32_t queue_index, Error **errp); + int (*do_sym_op)(CryptoDevBackend *backend, + CryptoDevBackendSymOpInfo *op_info, + uint32_t queue_index, Error **errp); +} CryptoDevBackendClass; + + +struct CryptoDevBackendClient { + char *model; + char *name; + char *info_str; + unsigned int queue_index; + QTAILQ_ENTRY(CryptoDevBackendClient) next; +}; + +struct CryptoDevBackendPeers { + CryptoDevBackendClient *ccs[MAX_CRYPTO_QUEUE_NUM]; + uint32_t queues; +}; + +struct CryptoDevBackendConf { + CryptoDevBackendPeers peers; + + /* Supported service mask */ + uint32_t crypto_services; + + /* Detailed algorithms mask */ + uint32_t cipher_algo_l; + uint32_t cipher_algo_h; + uint32_t hash_algo; + uint32_t mac_algo_l; + uint32_t mac_algo_h; + uint32_t aead_algo; + /* Maximum length of cipher key */ + uint32_t max_cipher_key_len; + /* Maximum length of authenticated key */ + uint32_t max_auth_key_len; + /* Maximum size of each crypto request's content */ + uint64_t max_size; +}; + +struct CryptoDevBackend { + Object parent_obj; + + bool ready; + CryptoDevBackendConf conf; +}; + +/** + * cryptodev_backend_new_client: + * @model: the cryptodev backend model + * @name: the cryptodev backend name, can be NULL + * + * Creates a new cryptodev backend client object + * with the @name in the model @model. + * + * The returned object must be released with + * cryptodev_backend_free_client() when no + * longer required + * + * Returns: a new cryptodev backend client object + */ +CryptoDevBackendClient * +cryptodev_backend_new_client(const char *model, + const char *name); +/** + * cryptodev_backend_free_client: + * @cc: the cryptodev backend client object + * + * Release the memory associated with @cc that + * was previously allocated by cryptodev_backend_new_client() + */ +void cryptodev_backend_free_client( + CryptoDevBackendClient *cc); + +/** + * cryptodev_backend_cleanup: + * @backend: the cryptodev backend object + * @errp: pointer to a NULL-initialized error object + * + * Clean the resouce associated with @backend that realizaed + * by the specific backend's init() callback + */ +void cryptodev_backend_cleanup( + CryptoDevBackend *backend, + Error **errp); + +/** + * cryptodev_backend_sym_create_session: + * @backend: the cryptodev backend object + * @sess_info: parameters needed by session creating + * @queue_index: queue index of cryptodev backend client + * @errp: pointer to a NULL-initialized error object + * + * Create a session for symmetric algorithms + * + * Returns: session id on success, or -1 on error + */ +int64_t cryptodev_backend_sym_create_session( + CryptoDevBackend *backend, + CryptoDevBackendSymSessionInfo *sess_info, + uint32_t queue_index, Error **errp); + +/** + * cryptodev_backend_sym_close_session: + * @backend: the cryptodev backend object + * @session_id: the session id + * @queue_index: queue index of cryptodev backend client + * @errp: pointer to a NULL-initialized error object + * + * Close a session for symmetric algorithms which was previously + * created by cryptodev_backend_sym_create_session() + * + * Returns: 0 on success, or Negative on error + */ +int cryptodev_backend_sym_close_session( + CryptoDevBackend *backend, + uint64_t session_id, + uint32_t queue_index, Error **errp); + +/** + * cryptodev_backend_crypto_operation: + * @backend: the cryptodev backend object + * @opaque: pointer to a VirtIOCryptoReq object + * @queue_index: queue index of cryptodev backend client + * @errp: pointer to a NULL-initialized error object + * + * Do crypto operation, such as encryption and + * decryption + * + * Returns: VIRTIO_CRYPTO_OK on success, + * or -VIRTIO_CRYPTO_* on error + */ +int cryptodev_backend_crypto_operation( + CryptoDevBackend *backend, + void *opaque, + uint32_t queue_index, Error **errp); + +#endif /* CRYPTODEV_H */ diff --git a/main-loop.c b/main-loop.c index 66c4eb69a3..ad10bca211 100644 --- a/main-loop.c +++ b/main-loop.c @@ -234,7 +234,7 @@ static int os_host_main_loop_wait(int64_t timeout) if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) { static bool notified; - if (!notified && !qtest_driver()) { + if (!notified && !qtest_enabled() && !qtest_driver()) { fprintf(stderr, "main-loop: WARNING: I/O thread spun for %d iterations\n", MAX_MAIN_LOOP_SPIN); @@ -3955,6 +3955,27 @@ static void monitor_readline_flush(void *opaque) monitor_flush(opaque); } +/* + * Print to current monitor if we have one, else to stderr. + * TODO should return int, so callers can calculate width, but that + * requires surgery to monitor_vprintf(). Left for another day. + */ +void error_vprintf(const char *fmt, va_list ap) +{ + if (cur_mon && !monitor_cur_is_qmp()) { + monitor_vprintf(cur_mon, fmt, ap); + } else { + vfprintf(stderr, fmt, ap); + } +} + +void error_vprintf_unless_qmp(const char *fmt, va_list ap) +{ + if (cur_mon && !monitor_cur_is_qmp()) { + monitor_vprintf(cur_mon, fmt, ap); + } +} + static void __attribute__((constructor)) monitor_lock_init(void) { qemu_mutex_init(&monitor_lock); diff --git a/nbd/client.c b/nbd/client.c index f6db8369b3..7db4301d29 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2016 Red Hat, Inc. * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> * * Network Block Device Client Side @@ -22,23 +23,34 @@ static int nbd_errno_to_system_errno(int err) { + int ret; switch (err) { case NBD_SUCCESS: - return 0; + ret = 0; + break; case NBD_EPERM: - return EPERM; + ret = EPERM; + break; case NBD_EIO: - return EIO; + ret = EIO; + break; case NBD_ENOMEM: - return ENOMEM; + ret = ENOMEM; + break; case NBD_ENOSPC: - return ENOSPC; + ret = ENOSPC; + break; + case NBD_ESHUTDOWN: + ret = ESHUTDOWN; + break; default: TRACE("Squashing unexpected error %d to EINVAL", err); /* fallthrough */ case NBD_EINVAL: - return EINVAL; + ret = EINVAL; + break; } + return ret; } /* Definitions for opaque data types */ @@ -74,64 +86,180 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); */ +/* Discard length bytes from channel. Return -errno on failure, or + * the amount of bytes consumed. */ +static ssize_t drop_sync(QIOChannel *ioc, size_t size) +{ + ssize_t ret, dropped = size; + char small[1024]; + char *buffer; + + buffer = sizeof(small) < size ? small : g_malloc(MIN(65536, size)); + while (size > 0) { + ret = read_sync(ioc, buffer, MIN(65536, size)); + if (ret < 0) { + goto cleanup; + } + assert(ret <= size); + size -= ret; + } + ret = dropped; + + cleanup: + if (buffer != small) { + g_free(buffer); + } + return ret; +} + +/* Send an option request. + * + * The request is for option @opt, with @data containing @len bytes of + * additional payload for the request (@len may be -1 to treat @data as + * a C string; and @data may be NULL if @len is 0). + * Return 0 if successful, -1 with errp set if it is impossible to + * continue. */ +static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt, + uint32_t len, const char *data, + Error **errp) +{ + nbd_option req; + QEMU_BUILD_BUG_ON(sizeof(req) != 16); + + if (len == -1) { + req.length = len = strlen(data); + } + TRACE("Sending option request %" PRIu32", len %" PRIu32, opt, len); -/* If type represents success, return 1 without further action. - * If type represents an error reply, consume the rest of the packet on ioc. - * Then return 0 for unsupported (so the client can fall back to - * other approaches), or -1 with errp set for other errors. + stq_be_p(&req.magic, NBD_OPTS_MAGIC); + stl_be_p(&req.option, opt); + stl_be_p(&req.length, len); + + if (write_sync(ioc, &req, sizeof(req)) != sizeof(req)) { + error_setg(errp, "Failed to send option request header"); + return -1; + } + + if (len && write_sync(ioc, (char *) data, len) != len) { + error_setg(errp, "Failed to send option request data"); + return -1; + } + + return 0; +} + +/* Send NBD_OPT_ABORT as a courtesy to let the server know that we are + * not going to attempt further negotiation. */ +static void nbd_send_opt_abort(QIOChannel *ioc) +{ + /* Technically, a compliant server is supposed to reply to us; but + * older servers disconnected instead. At any rate, we're allowed + * to disconnect without waiting for the server reply, so we don't + * even care if the request makes it to the server, let alone + * waiting around for whether the server replies. */ + nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL); +} + + +/* Receive the header of an option reply, which should match the given + * opt. Read through the length field, but NOT the length bytes of + * payload. Return 0 if successful, -1 with errp set if it is + * impossible to continue. */ +static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt, + nbd_opt_reply *reply, Error **errp) +{ + QEMU_BUILD_BUG_ON(sizeof(*reply) != 20); + if (read_sync(ioc, reply, sizeof(*reply)) != sizeof(*reply)) { + error_setg(errp, "failed to read option reply"); + nbd_send_opt_abort(ioc); + return -1; + } + be64_to_cpus(&reply->magic); + be32_to_cpus(&reply->option); + be32_to_cpus(&reply->type); + be32_to_cpus(&reply->length); + + TRACE("Received option reply %" PRIx32", type %" PRIx32", len %" PRIu32, + reply->option, reply->type, reply->length); + + if (reply->magic != NBD_REP_MAGIC) { + error_setg(errp, "Unexpected option reply magic"); + nbd_send_opt_abort(ioc); + return -1; + } + if (reply->option != opt) { + error_setg(errp, "Unexpected option type %x expected %x", + reply->option, opt); + nbd_send_opt_abort(ioc); + return -1; + } + return 0; +} + +/* If reply represents success, return 1 without further action. + * If reply represents an error, consume the optional payload of + * the packet on ioc. Then return 0 for unsupported (so the client + * can fall back to other approaches), or -1 with errp set for other + * errors. */ -static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type, +static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply, Error **errp) { - uint32_t len; char *msg = NULL; int result = -1; - if (!(type & (1 << 31))) { + if (!(reply->type & (1 << 31))) { return 1; } - if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) { - error_setg(errp, "failed to read option length"); - return -1; - } - len = be32_to_cpu(len); - if (len) { - if (len > NBD_MAX_BUFFER_SIZE) { + if (reply->length) { + if (reply->length > NBD_MAX_BUFFER_SIZE) { error_setg(errp, "server's error message is too long"); goto cleanup; } - msg = g_malloc(len + 1); - if (read_sync(ioc, msg, len) != len) { + msg = g_malloc(reply->length + 1); + if (read_sync(ioc, msg, reply->length) != reply->length) { error_setg(errp, "failed to read option error message"); goto cleanup; } - msg[len] = '\0'; + msg[reply->length] = '\0'; } - switch (type) { + switch (reply->type) { case NBD_REP_ERR_UNSUP: TRACE("server doesn't understand request %" PRIx32 - ", attempting fallback", opt); + ", attempting fallback", reply->option); result = 0; goto cleanup; case NBD_REP_ERR_POLICY: - error_setg(errp, "Denied by server for option %" PRIx32, opt); + error_setg(errp, "Denied by server for option %" PRIx32, + reply->option); break; case NBD_REP_ERR_INVALID: - error_setg(errp, "Invalid data length for option %" PRIx32, opt); + error_setg(errp, "Invalid data length for option %" PRIx32, + reply->option); + break; + + case NBD_REP_ERR_PLATFORM: + error_setg(errp, "Server lacks support for option %" PRIx32, + reply->option); break; case NBD_REP_ERR_TLS_REQD: error_setg(errp, "TLS negotiation required before option %" PRIx32, - opt); + reply->option); + break; + + case NBD_REP_ERR_SHUTDOWN: + error_setg(errp, "Server shutting down before option %" PRIx32, + reply->option); break; default: error_setg(errp, "Unknown error code when asking for option %" PRIx32, - opt); + reply->option); break; } @@ -141,244 +269,160 @@ static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type, cleanup: g_free(msg); + if (result < 0) { + nbd_send_opt_abort(ioc); + } return result; } -static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp) +/* Process another portion of the NBD_OPT_LIST reply. Set *@match if + * the current reply matches @want or if the server does not support + * NBD_OPT_LIST, otherwise leave @match alone. Return 0 if iteration + * is complete, positive if more replies are expected, or negative + * with @errp set if an unrecoverable error occurred. */ +static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match, + Error **errp) { - uint64_t magic; - uint32_t opt; - uint32_t type; + nbd_opt_reply reply; uint32_t len; uint32_t namelen; + char name[NBD_MAX_NAME_SIZE + 1]; int error; - *name = NULL; - if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { - error_setg(errp, "failed to read list option magic"); + if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) { return -1; } - magic = be64_to_cpu(magic); - if (magic != NBD_REP_MAGIC) { - error_setg(errp, "Unexpected option list magic"); - return -1; - } - if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { - error_setg(errp, "failed to read list option"); - return -1; + error = nbd_handle_reply_err(ioc, &reply, errp); + if (error <= 0) { + /* The server did not support NBD_OPT_LIST, so set *match on + * the assumption that any name will be accepted. */ + *match = true; + return error; } - opt = be32_to_cpu(opt); - if (opt != NBD_OPT_LIST) { - error_setg(errp, "Unexpected option type %" PRIx32 " expected %x", - opt, NBD_OPT_LIST); + len = reply.length; + + if (reply.type == NBD_REP_ACK) { + if (len != 0) { + error_setg(errp, "length too long for option end"); + nbd_send_opt_abort(ioc); + return -1; + } + return 0; + } else if (reply.type != NBD_REP_SERVER) { + error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x", + reply.type, NBD_REP_SERVER); + nbd_send_opt_abort(ioc); return -1; } - if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) { - error_setg(errp, "failed to read list option type"); + if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) { + error_setg(errp, "incorrect option length %" PRIu32, len); + nbd_send_opt_abort(ioc); return -1; } - type = be32_to_cpu(type); - error = nbd_handle_reply_err(ioc, opt, type, errp); - if (error <= 0) { - return error; + if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) { + error_setg(errp, "failed to read option name length"); + nbd_send_opt_abort(ioc); + return -1; } - - if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) { - error_setg(errp, "failed to read option length"); + namelen = be32_to_cpu(namelen); + len -= sizeof(namelen); + if (len < namelen) { + error_setg(errp, "incorrect option name length"); + nbd_send_opt_abort(ioc); return -1; } - len = be32_to_cpu(len); - - if (type == NBD_REP_ACK) { - if (len != 0) { - error_setg(errp, "length too long for option end"); - return -1; - } - } else if (type == NBD_REP_SERVER) { - if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) { - error_setg(errp, "incorrect option length"); - return -1; - } - if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) { - error_setg(errp, "failed to read option name length"); - return -1; - } - namelen = be32_to_cpu(namelen); - len -= sizeof(namelen); - if (len < namelen) { - error_setg(errp, "incorrect option name length"); - return -1; - } - if (namelen > NBD_MAX_NAME_SIZE) { - error_setg(errp, "export name length too long %" PRIu32, namelen); + if (namelen != strlen(want)) { + if (drop_sync(ioc, len) != len) { + error_setg(errp, "failed to skip export name with wrong length"); + nbd_send_opt_abort(ioc); return -1; } + return 1; + } - *name = g_new0(char, namelen + 1); - if (read_sync(ioc, *name, namelen) != namelen) { - error_setg(errp, "failed to read export name"); - g_free(*name); - *name = NULL; - return -1; - } - (*name)[namelen] = '\0'; - len -= namelen; - if (len) { - char *buf = g_malloc(len + 1); - if (read_sync(ioc, buf, len) != len) { - error_setg(errp, "failed to read export description"); - g_free(*name); - g_free(buf); - *name = NULL; - return -1; - } - buf[len] = '\0'; - TRACE("Ignoring export description: %s", buf); - g_free(buf); - } - } else { - error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x", - type, NBD_REP_SERVER); + assert(namelen < sizeof(name)); + if (read_sync(ioc, name, namelen) != namelen) { + error_setg(errp, "failed to read export name"); + nbd_send_opt_abort(ioc); return -1; } + name[namelen] = '\0'; + len -= namelen; + if (drop_sync(ioc, len) != len) { + error_setg(errp, "failed to read export description"); + nbd_send_opt_abort(ioc); + return -1; + } + if (!strcmp(name, want)) { + *match = true; + } return 1; } +/* Return -1 on failure, 0 if wantname is an available export. */ static int nbd_receive_query_exports(QIOChannel *ioc, const char *wantname, Error **errp) { - uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC); - uint32_t opt = cpu_to_be32(NBD_OPT_LIST); - uint32_t length = 0; bool foundExport = false; - TRACE("Querying export list"); - if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { - error_setg(errp, "Failed to send list option magic"); - return -1; - } - - if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { - error_setg(errp, "Failed to send list option number"); - return -1; - } - - if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) { - error_setg(errp, "Failed to send list option length"); + TRACE("Querying export list for '%s'", wantname); + if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) { return -1; } TRACE("Reading available export names"); while (1) { - char *name = NULL; - int ret = nbd_receive_list(ioc, &name, errp); + int ret = nbd_receive_list(ioc, wantname, &foundExport, errp); if (ret < 0) { - g_free(name); - name = NULL; + /* Server gave unexpected reply */ return -1; + } else if (ret == 0) { + /* Done iterating. */ + if (!foundExport) { + error_setg(errp, "No export with name '%s' available", + wantname); + nbd_send_opt_abort(ioc); + return -1; + } + TRACE("Found desired export name '%s'", wantname); + return 0; } - if (ret == 0) { - /* Server doesn't support export listing, so - * we will just assume an export with our - * wanted name exists */ - foundExport = true; - break; - } - if (name == NULL) { - TRACE("End of export name list"); - break; - } - if (g_str_equal(name, wantname)) { - foundExport = true; - TRACE("Found desired export name '%s'", name); - } else { - TRACE("Ignored export name '%s'", name); - } - g_free(name); - } - - if (!foundExport) { - error_setg(errp, "No export with name '%s' available", wantname); - return -1; } - - return 0; } static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, QCryptoTLSCreds *tlscreds, const char *hostname, Error **errp) { - uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC); - uint32_t opt = cpu_to_be32(NBD_OPT_STARTTLS); - uint32_t length = 0; - uint32_t type; + nbd_opt_reply reply; QIOChannelTLS *tioc; struct NBDTLSHandshakeData data = { 0 }; TRACE("Requesting TLS from server"); - if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { - error_setg(errp, "Failed to send option magic"); - return NULL; - } - - if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { - error_setg(errp, "Failed to send option number"); - return NULL; - } - - if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) { - error_setg(errp, "Failed to send option length"); - return NULL; - } - - TRACE("Getting TLS reply from server1"); - if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { - error_setg(errp, "failed to read option magic"); - return NULL; - } - magic = be64_to_cpu(magic); - if (magic != NBD_REP_MAGIC) { - error_setg(errp, "Unexpected option magic"); - return NULL; - } - TRACE("Getting TLS reply from server2"); - if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { - error_setg(errp, "failed to read option"); - return NULL; - } - opt = be32_to_cpu(opt); - if (opt != NBD_OPT_STARTTLS) { - error_setg(errp, "Unexpected option type %" PRIx32 " expected %x", - opt, NBD_OPT_STARTTLS); + if (nbd_send_option_request(ioc, NBD_OPT_STARTTLS, 0, NULL, errp) < 0) { return NULL; } TRACE("Getting TLS reply from server"); - if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) { - error_setg(errp, "failed to read option type"); + if (nbd_receive_option_reply(ioc, NBD_OPT_STARTTLS, &reply, errp) < 0) { return NULL; } - type = be32_to_cpu(type); - if (type != NBD_REP_ACK) { + + if (reply.type != NBD_REP_ACK) { error_setg(errp, "Server rejected request to start TLS %" PRIx32, - type); + reply.type); + nbd_send_opt_abort(ioc); return NULL; } - TRACE("Getting TLS reply from server"); - if (read_sync(ioc, &length, sizeof(length)) != sizeof(length)) { - error_setg(errp, "failed to read option length"); - return NULL; - } - length = be32_to_cpu(length); - if (length != 0) { + if (reply.length != 0) { error_setg(errp, "Start TLS response was not zero %" PRIu32, - length); + reply.length); + nbd_send_opt_abort(ioc); return NULL; } @@ -417,6 +461,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, char buf[256]; uint64_t magic, s; int rc; + bool zeroes = true; TRACE("Receiving negotiation tlscreds=%p hostname=%s.", tlscreds, hostname ? hostname : "<null>"); @@ -466,8 +511,6 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, if (magic == NBD_OPTS_MAGIC) { uint32_t clientflags = 0; - uint32_t opt; - uint32_t namesize; uint16_t globalflags; bool fixedNewStyle = false; @@ -483,6 +526,11 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, TRACE("Server supports fixed new style"); clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE; } + if (globalflags & NBD_FLAG_NO_ZEROES) { + zeroes = false; + TRACE("Server supports no zeroes"); + clientflags |= NBD_FLAG_C_NO_ZEROES; + } /* client requested flags */ clientflags = cpu_to_be32(clientflags); if (write_sync(ioc, &clientflags, sizeof(clientflags)) != @@ -517,28 +565,13 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, goto fail; } } - /* write the export name */ - magic = cpu_to_be64(magic); - if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { - error_setg(errp, "Failed to send export name magic"); - goto fail; - } - opt = cpu_to_be32(NBD_OPT_EXPORT_NAME); - if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { - error_setg(errp, "Failed to send export name option number"); - goto fail; - } - namesize = cpu_to_be32(strlen(name)); - if (write_sync(ioc, &namesize, sizeof(namesize)) != - sizeof(namesize)) { - error_setg(errp, "Failed to send export name length"); - goto fail; - } - if (write_sync(ioc, (char *)name, strlen(name)) != strlen(name)) { - error_setg(errp, "Failed to send export name"); + /* write the export name request */ + if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, name, + errp) < 0) { goto fail; } + /* Read the response */ if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) { error_setg(errp, "Failed to read export length"); goto fail; @@ -585,7 +618,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, } TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags); - if (read_sync(ioc, &buf, 124) != 124) { + if (zeroes && drop_sync(ioc, 124) != 124) { error_setg(errp, "Failed to read reserved block"); goto fail; } @@ -707,18 +740,20 @@ int nbd_disconnect(int fd) } #endif -ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request) +ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request) { uint8_t buf[NBD_REQUEST_SIZE]; ssize_t ret; TRACE("Sending request to server: " "{ .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 - ", .type=%" PRIu32 " }", - request->from, request->len, request->handle, request->type); + ", .flags = %" PRIx16 ", .type = %" PRIu16 " }", + request->from, request->len, request->handle, + request->flags, request->type); stl_be_p(buf, NBD_REQUEST_MAGIC); - stl_be_p(buf + 4, request->type); + stw_be_p(buf + 4, request->flags); + stw_be_p(buf + 6, request->type); stq_be_p(buf + 8, request->handle); stq_be_p(buf + 16, request->from); stl_be_p(buf + 24, request->len); @@ -735,7 +770,7 @@ ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request) return 0; } -ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply) +ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply) { uint8_t buf[NBD_REPLY_SIZE]; uint32_t magic; @@ -763,6 +798,11 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply) reply->error = nbd_errno_to_system_errno(reply->error); + if (reply->error == ESHUTDOWN) { + /* This works even on mingw which lacks a native ESHUTDOWN */ + LOG("server shutting down"); + return -EINVAL; + } TRACE("Got reply: { magic = 0x%" PRIx32 ", .error = % " PRId32 ", handle = %" PRIu64" }", magic, reply->error, reply->handle); diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h index 93a6ca8549..eee20abc25 100644 --- a/nbd/nbd-internal.h +++ b/nbd/nbd-internal.h @@ -53,16 +53,16 @@ /* This is all part of the "official" NBD API. * * The most up-to-date documentation is available at: - * https://github.com/yoe/nbd/blob/master/doc/proto.txt + * https://github.com/yoe/nbd/blob/master/doc/proto.md */ -#define NBD_REQUEST_SIZE (4 + 4 + 8 + 8 + 4) +#define NBD_REQUEST_SIZE (4 + 2 + 2 + 8 + 8 + 4) #define NBD_REPLY_SIZE (4 + 4 + 8) #define NBD_REQUEST_MAGIC 0x25609513 #define NBD_REPLY_MAGIC 0x67446698 #define NBD_OPTS_MAGIC 0x49484156454F5054LL #define NBD_CLIENT_MAGIC 0x0000420281861253LL -#define NBD_REP_MAGIC 0x3e889045565a9LL +#define NBD_REP_MAGIC 0x0003e889045565a9LL #define NBD_SET_SOCK _IO(0xab, 0) #define NBD_SET_BLKSIZE _IO(0xab, 1) @@ -92,6 +92,7 @@ #define NBD_ENOMEM 12 #define NBD_EINVAL 22 #define NBD_ENOSPC 28 +#define NBD_ESHUTDOWN 108 static inline ssize_t read_sync(QIOChannel *ioc, void *buffer, size_t size) { @@ -104,9 +105,10 @@ static inline ssize_t read_sync(QIOChannel *ioc, void *buffer, size_t size) return nbd_wr_syncv(ioc, &iov, 1, size, true); } -static inline ssize_t write_sync(QIOChannel *ioc, void *buffer, size_t size) +static inline ssize_t write_sync(QIOChannel *ioc, const void *buffer, + size_t size) { - struct iovec iov = { .iov_base = buffer, .iov_len = size }; + struct iovec iov = { .iov_base = (void *) buffer, .iov_len = size }; return nbd_wr_syncv(ioc, &iov, 1, size, false); } diff --git a/nbd/server.c b/nbd/server.c index 36bcafcd50..5b76261666 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2016 Red Hat, Inc. * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> * * Network Block Device Server Side @@ -38,6 +39,8 @@ static int system_errno_to_nbd_errno(int err) case EFBIG: case ENOSPC: return NBD_ENOSPC; + case ESHUTDOWN: + return NBD_ESHUTDOWN; case EINVAL: default: return NBD_EINVAL; @@ -46,10 +49,10 @@ static int system_errno_to_nbd_errno(int err) /* Definitions for opaque data types */ -typedef struct NBDRequest NBDRequest; +typedef struct NBDRequestData NBDRequestData; -struct NBDRequest { - QSIMPLEQ_ENTRY(NBDRequest) entry; +struct NBDRequestData { + QSIMPLEQ_ENTRY(NBDRequestData) entry; NBDClient *client; uint8_t *data; bool complete; @@ -61,6 +64,7 @@ struct NBDExport { BlockBackend *blk; char *name; + char *description; off_t dev_offset; off_t size; uint16_t nbdflags; @@ -79,6 +83,7 @@ struct NBDClient { int refcount; void (*close)(NBDClient *client); + bool no_zeroes; NBDExport *exp; QCryptoTLSCreds *tlscreds; char *tlsaclname; @@ -129,7 +134,8 @@ static ssize_t nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size) } -static ssize_t nbd_negotiate_write(QIOChannel *ioc, void *buffer, size_t size) +static ssize_t nbd_negotiate_write(QIOChannel *ioc, const void *buffer, + size_t size) { ssize_t ret; guint watch; @@ -193,12 +199,15 @@ static ssize_t nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size) */ -static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt) +/* Send a reply header, including length, but no payload. + * Return -errno on error, 0 on success. */ +static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type, + uint32_t opt, uint32_t len) { uint64_t magic; - uint32_t len; - TRACE("Reply opt=%" PRIx32 " type=%" PRIx32, type, opt); + TRACE("Reply opt=%" PRIx32 " type=%" PRIx32 " len=%" PRIu32, + type, opt, len); magic = cpu_to_be64(NBD_REP_MAGIC); if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) { @@ -215,7 +224,7 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt) LOG("write failed (rep type)"); return -EINVAL; } - len = cpu_to_be32(0); + len = cpu_to_be32(len); if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { LOG("write failed (rep data length)"); return -EINVAL; @@ -223,45 +232,82 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt) return 0; } -static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp) +/* Send a reply header with default 0 length. + * Return -errno on error, 0 on success. */ +static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt) { - uint64_t magic, name_len; - uint32_t opt, type, len; + return nbd_negotiate_send_rep_len(ioc, type, opt, 0); +} - TRACE("Advertising export name '%s'", exp->name ? exp->name : ""); - name_len = strlen(exp->name); - magic = cpu_to_be64(NBD_REP_MAGIC); - if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) { - LOG("write failed (magic)"); - return -EINVAL; - } - opt = cpu_to_be32(NBD_OPT_LIST); - if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) { - LOG("write failed (opt)"); - return -EINVAL; +/* Send an error reply. + * Return -errno on error, 0 on success. */ +static int GCC_FMT_ATTR(4, 5) +nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type, + uint32_t opt, const char *fmt, ...) +{ + va_list va; + char *msg; + int ret; + size_t len; + + va_start(va, fmt); + msg = g_strdup_vprintf(fmt, va); + va_end(va); + len = strlen(msg); + assert(len < 4096); + TRACE("sending error message \"%s\"", msg); + ret = nbd_negotiate_send_rep_len(ioc, type, opt, len); + if (ret < 0) { + goto out; } - type = cpu_to_be32(NBD_REP_SERVER); - if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) { - LOG("write failed (reply type)"); - return -EINVAL; + if (nbd_negotiate_write(ioc, msg, len) != len) { + LOG("write failed (error message)"); + ret = -EIO; + } else { + ret = 0; } - len = cpu_to_be32(name_len + sizeof(len)); - if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { - LOG("write failed (length)"); - return -EINVAL; +out: + g_free(msg); + return ret; +} + +/* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload. + * Return -errno on error, 0 on success. */ +static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp) +{ + size_t name_len, desc_len; + uint32_t len; + const char *name = exp->name ? exp->name : ""; + const char *desc = exp->description ? exp->description : ""; + int rc; + + TRACE("Advertising export name '%s' description '%s'", name, desc); + name_len = strlen(name); + desc_len = strlen(desc); + len = name_len + desc_len + sizeof(len); + rc = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len); + if (rc < 0) { + return rc; } + len = cpu_to_be32(name_len); if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { - LOG("write failed (length)"); + LOG("write failed (name length)"); + return -EINVAL; + } + if (nbd_negotiate_write(ioc, name, name_len) != name_len) { + LOG("write failed (name buffer)"); return -EINVAL; } - if (nbd_negotiate_write(ioc, exp->name, name_len) != name_len) { - LOG("write failed (buffer)"); + if (nbd_negotiate_write(ioc, desc, desc_len) != desc_len) { + LOG("write failed (description buffer)"); return -EINVAL; } return 0; } +/* Process the NBD_OPT_LIST command, with a potential series of replies. + * Return -errno on error, 0 on success. */ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) { NBDExport *exp; @@ -270,8 +316,9 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) if (nbd_negotiate_drop_sync(client->ioc, length) != length) { return -EIO; } - return nbd_negotiate_send_rep(client->ioc, - NBD_REP_ERR_INVALID, NBD_OPT_LIST); + return nbd_negotiate_send_rep_err(client->ioc, + NBD_REP_ERR_INVALID, NBD_OPT_LIST, + "OPT_LIST should not have length"); } /* For each export, send a NBD_REP_SERVER reply. */ @@ -318,7 +365,8 @@ fail: return rc; } - +/* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the + * new channel for all further (now-encrypted) communication. */ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, uint32_t length) { @@ -332,7 +380,8 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, if (nbd_negotiate_drop_sync(ioc, length) != length) { return NULL; } - nbd_negotiate_send_rep(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS); + nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS, + "OPT_STARTTLS should not have length"); return NULL; } @@ -371,6 +420,8 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, } +/* Process all NBD_OPT_* client option commands. + * Return -errno on error, 0 on success. */ static int nbd_negotiate_options(NBDClient *client) { uint32_t flags; @@ -402,6 +453,11 @@ static int nbd_negotiate_options(NBDClient *client) fixedNewstyle = true; flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE; } + if (flags & NBD_FLAG_C_NO_ZEROES) { + TRACE("Client supports no zeroes at handshake end"); + client->no_zeroes = true; + flags &= ~NBD_FLAG_C_NO_ZEROES; + } if (flags != 0) { TRACE("Unknown client flags 0x%" PRIx32 " received", flags); return -EIO; @@ -461,16 +517,22 @@ static int nbd_negotiate_options(NBDClient *client) return -EINVAL; default: - TRACE("Option 0x%" PRIx32 " not permitted before TLS", - clientflags); if (nbd_negotiate_drop_sync(client->ioc, length) != length) { return -EIO; } - ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD, - clientflags); + ret = nbd_negotiate_send_rep_err(client->ioc, + NBD_REP_ERR_TLS_REQD, + clientflags, + "Option 0x%" PRIx32 + "not permitted before TLS", + clientflags); if (ret < 0) { return ret; } + /* Let the client keep trying, unless they asked to quit */ + if (clientflags == NBD_OPT_ABORT) { + return -EINVAL; + } break; } } else if (fixedNewstyle) { @@ -483,6 +545,10 @@ static int nbd_negotiate_options(NBDClient *client) break; case NBD_OPT_ABORT: + /* NBD spec says we must try to reply before + * disconnecting, but that we must also tolerate + * guests that don't wait for our reply. */ + nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, clientflags); return -EINVAL; case NBD_OPT_EXPORT_NAME: @@ -493,27 +559,30 @@ static int nbd_negotiate_options(NBDClient *client) return -EIO; } if (client->tlscreds) { - TRACE("TLS already enabled"); - ret = nbd_negotiate_send_rep(client->ioc, - NBD_REP_ERR_INVALID, - clientflags); + ret = nbd_negotiate_send_rep_err(client->ioc, + NBD_REP_ERR_INVALID, + clientflags, + "TLS already enabled"); } else { - TRACE("TLS not configured"); - ret = nbd_negotiate_send_rep(client->ioc, - NBD_REP_ERR_POLICY, - clientflags); + ret = nbd_negotiate_send_rep_err(client->ioc, + NBD_REP_ERR_POLICY, + clientflags, + "TLS not configured"); } if (ret < 0) { return ret; } break; default: - TRACE("Unsupported option 0x%" PRIx32, clientflags); if (nbd_negotiate_drop_sync(client->ioc, length) != length) { return -EIO; } - ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP, - clientflags); + ret = nbd_negotiate_send_rep_err(client->ioc, + NBD_REP_ERR_UNSUP, + clientflags, + "Unsupported option 0x%" + PRIx32, + clientflags); if (ret < 0) { return ret; } @@ -547,8 +616,10 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) char buf[8 + 8 + 8 + 128]; int rc; const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | - NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA); + NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | + NBD_FLAG_SEND_WRITE_ZEROES); bool oldStyle; + size_t len; /* Old style negotiation header without options [ 0 .. 7] passwd ("NBDMAGIC") @@ -565,7 +636,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) ....options sent.... [18 .. 25] size [26 .. 27] export flags - [28 .. 151] reserved (0) + [28 .. 151] reserved (0, omit if no_zeroes) */ qio_channel_set_blocking(client->ioc, false, NULL); @@ -584,7 +655,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) stw_be_p(buf + 26, client->exp->nbdflags | myflags); } else { stq_be_p(buf + 8, NBD_OPTS_MAGIC); - stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE); + stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES); } if (oldStyle) { @@ -611,8 +682,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) client->exp->size, client->exp->nbdflags | myflags); stq_be_p(buf + 18, client->exp->size); stw_be_p(buf + 26, client->exp->nbdflags | myflags); - if (nbd_negotiate_write(client->ioc, buf + 18, sizeof(buf) - 18) != - sizeof(buf) - 18) { + len = client->no_zeroes ? 10 : sizeof(buf) - 18; + if (nbd_negotiate_write(client->ioc, buf + 18, len) != len) { LOG("write failed"); goto fail; } @@ -624,7 +695,7 @@ fail: return rc; } -static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request) +static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request) { uint8_t buf[NBD_REQUEST_SIZE]; uint32_t magic; @@ -642,21 +713,23 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request) /* Request [ 0 .. 3] magic (NBD_REQUEST_MAGIC) - [ 4 .. 7] type (0 == READ, 1 == WRITE) + [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...) + [ 6 .. 7] type (NBD_CMD_READ, ...) [ 8 .. 15] handle [16 .. 23] from [24 .. 27] len */ magic = ldl_be_p(buf); - request->type = ldl_be_p(buf + 4); + request->flags = lduw_be_p(buf + 4); + request->type = lduw_be_p(buf + 6); request->handle = ldq_be_p(buf + 8); request->from = ldq_be_p(buf + 16); request->len = ldl_be_p(buf + 24); - TRACE("Got request: { magic = 0x%" PRIx32 ", .type = %" PRIx32 - ", from = %" PRIu64 " , len = %" PRIu32 " }", - magic, request->type, request->from, request->len); + TRACE("Got request: { magic = 0x%" PRIx32 ", .flags = %" PRIx16 + ", .type = %" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }", + magic, request->flags, request->type, request->from, request->len); if (magic != NBD_REQUEST_MAGIC) { LOG("invalid magic (got 0x%" PRIx32 ")", magic); @@ -665,7 +738,7 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request) return 0; } -static ssize_t nbd_send_reply(QIOChannel *ioc, struct nbd_reply *reply) +static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply) { uint8_t buf[NBD_REPLY_SIZE]; ssize_t ret; @@ -747,21 +820,21 @@ static void client_close(NBDClient *client) } } -static NBDRequest *nbd_request_get(NBDClient *client) +static NBDRequestData *nbd_request_get(NBDClient *client) { - NBDRequest *req; + NBDRequestData *req; assert(client->nb_requests <= MAX_NBD_REQUESTS - 1); client->nb_requests++; nbd_update_can_read(client); - req = g_new0(NBDRequest, 1); + req = g_new0(NBDRequestData, 1); nbd_client_get(client); req->client = client; return req; } -static void nbd_request_put(NBDRequest *req) +static void nbd_request_put(NBDRequestData *req) { NBDClient *client = req->client; @@ -894,6 +967,12 @@ void nbd_export_set_name(NBDExport *exp, const char *name) nbd_export_put(exp); } +void nbd_export_set_description(NBDExport *exp, const char *description) +{ + g_free(exp->description); + exp->description = g_strdup(description); +} + void nbd_export_close(NBDExport *exp) { NBDClient *client, *next; @@ -903,6 +982,7 @@ void nbd_export_close(NBDExport *exp) client_close(client); } nbd_export_set_name(exp, NULL); + nbd_export_set_description(exp, NULL); nbd_export_put(exp); } @@ -921,6 +1001,7 @@ void nbd_export_put(NBDExport *exp) if (--exp->refcount == 0) { assert(exp->name == NULL); + assert(exp->description == NULL); if (exp->close) { exp->close(exp); @@ -955,7 +1036,7 @@ void nbd_export_close_all(void) } } -static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, +static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len) { NBDClient *client = req->client; @@ -991,11 +1072,10 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, * and any other negative value to report an error to the client * (although the caller may still need to disconnect after reporting * the error). */ -static ssize_t nbd_co_receive_request(NBDRequest *req, - struct nbd_request *request) +static ssize_t nbd_co_receive_request(NBDRequestData *req, + NBDRequest *request) { NBDClient *client = req->client; - uint32_t command; ssize_t rc; g_assert(qemu_in_coroutine()); @@ -1012,13 +1092,12 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, TRACE("Decoding type"); - command = request->type & NBD_CMD_MASK_COMMAND; - if (command != NBD_CMD_WRITE) { + if (request->type != NBD_CMD_WRITE) { /* No payload, we are ready to read the next request. */ req->complete = true; } - if (command == NBD_CMD_DISC) { + if (request->type == NBD_CMD_DISC) { /* Special case: we're going to disconnect without a reply, * whether or not flags, from, or len are bogus */ TRACE("Request type is DISCONNECT"); @@ -1035,7 +1114,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, goto out; } - if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) { + if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) { if (request->len > NBD_MAX_BUFFER_SIZE) { LOG("len (%" PRIu32" ) is larger than max len (%u)", request->len, NBD_MAX_BUFFER_SIZE); @@ -1049,7 +1128,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, goto out; } } - if (command == NBD_CMD_WRITE) { + if (request->type == NBD_CMD_WRITE) { TRACE("Reading %" PRIu32 " byte(s)", request->len); if (read_sync(client->ioc, req->data, request->len) != request->len) { @@ -1065,12 +1144,17 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32 ", Size: %" PRIu64, request->from, request->len, (uint64_t)client->exp->size); - rc = command == NBD_CMD_WRITE ? -ENOSPC : -EINVAL; + rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL; + goto out; + } + if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) { + LOG("unsupported flags (got 0x%x)", request->flags); + rc = -EINVAL; goto out; } - if (request->type & ~NBD_CMD_MASK_COMMAND & ~NBD_CMD_FLAG_FUA) { - LOG("unsupported flags (got 0x%x)", - request->type & ~NBD_CMD_MASK_COMMAND); + if (request->type != NBD_CMD_WRITE_ZEROES && + (request->flags & NBD_CMD_FLAG_NO_HOLE)) { + LOG("unexpected flags (got 0x%x)", request->flags); rc = -EINVAL; goto out; } @@ -1088,11 +1172,10 @@ static void nbd_trip(void *opaque) { NBDClient *client = opaque; NBDExport *exp = client->exp; - NBDRequest *req; - struct nbd_request request; - struct nbd_reply reply; + NBDRequestData *req; + NBDRequest request; + NBDReply reply; ssize_t ret; - uint32_t command; int flags; TRACE("Reading request."); @@ -1116,7 +1199,6 @@ static void nbd_trip(void *opaque) reply.error = -ret; goto error_reply; } - command = request.type & NBD_CMD_MASK_COMMAND; if (client->closing) { /* @@ -1126,11 +1208,12 @@ static void nbd_trip(void *opaque) goto done; } - switch (command) { + switch (request.type) { case NBD_CMD_READ: TRACE("Request type is READ"); - if (request.type & NBD_CMD_FLAG_FUA) { + /* XXX: NBD Protocol only documents use of FUA with WRITE */ + if (request.flags & NBD_CMD_FLAG_FUA) { ret = blk_co_flush(exp->blk); if (ret < 0) { LOG("flush failed"); @@ -1163,7 +1246,7 @@ static void nbd_trip(void *opaque) TRACE("Writing to device"); flags = 0; - if (request.type & NBD_CMD_FLAG_FUA) { + if (request.flags & NBD_CMD_FLAG_FUA) { flags |= BDRV_REQ_FUA; } ret = blk_pwrite(exp->blk, request.from + exp->dev_offset, @@ -1179,6 +1262,37 @@ static void nbd_trip(void *opaque) } break; + case NBD_CMD_WRITE_ZEROES: + TRACE("Request type is WRITE_ZEROES"); + + if (exp->nbdflags & NBD_FLAG_READ_ONLY) { + TRACE("Server is read-only, return error"); + reply.error = EROFS; + goto error_reply; + } + + TRACE("Writing to device"); + + flags = 0; + if (request.flags & NBD_CMD_FLAG_FUA) { + flags |= BDRV_REQ_FUA; + } + if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) { + flags |= BDRV_REQ_MAY_UNMAP; + } + ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset, + request.len, flags); + if (ret < 0) { + LOG("writing to file failed"); + reply.error = -ret; + goto error_reply; + } + + if (nbd_co_send_reply(req, &reply, 0) < 0) { + goto out; + } + break; + case NBD_CMD_DISC: /* unreachable, thanks to special case in nbd_co_receive_request() */ abort(); diff --git a/net/slirp.c b/net/slirp.c index 64dd3255ae..bcd1c5f57d 100644 --- a/net/slirp.c +++ b/net/slirp.c @@ -763,8 +763,7 @@ static int slirp_guestfwd(SlirpState *s, const char *config_str, return -1; } - if (slirp_add_exec(s->slirp, 3, qemu_chr_fe_get_driver(&fwd->hd), - &server, port) < 0) { + if (slirp_add_exec(s->slirp, 3, &fwd->hd, &server, port) < 0) { error_report("conflicting/invalid host:port in guest forwarding " "rule '%s'", config_str); g_free(fwd); diff --git a/qapi-schema.json b/qapi-schema.json index 5dc96af469..b0b4bf64cc 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -4621,10 +4621,10 @@ # # @pause: system pauses # -# Since: 2.1 +# Since: 2.1 (poweroff since 2.8) ## { 'enum': 'GuestPanicAction', - 'data': [ 'pause' ] } + 'data': [ 'pause', 'poweroff' ] } ## # @rtc-reset-reinjection diff --git a/qemu-char.c b/qemu-char.c index 1e5a0e8cb9..2c9940cea4 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -735,19 +735,23 @@ static void mux_chr_read(void *opaque, const uint8_t *buf, int size) } } +static bool muxes_realized; + static void mux_chr_event(void *opaque, int event) { CharDriverState *chr = opaque; MuxDriver *d = chr->opaque; int i; + if (!muxes_realized) { + return; + } + /* Send the event to all registered listeners */ for (i = 0; i < d->mux_cnt; i++) mux_chr_send_event(d, i, event); } -static bool muxes_realized; - /** * Called after processing of default and command-line-specified * chardevs to deliver CHR_EVENT_OPENED events to any FEs attached diff --git a/qemu-nbd.c b/qemu-nbd.c index b757dc7621..c734f627b4 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -83,6 +83,7 @@ static void usage(const char *name) " -t, --persistent don't exit on the last connection\n" " -v, --verbose display extra debugging information\n" " -x, --export-name=NAME expose export by name\n" +" -D, --description=TEXT with -x, also export a human-readable description\n" "\n" "Exposing part of the image:\n" " -o, --offset=OFFSET offset into the image\n" @@ -477,7 +478,7 @@ int main(int argc, char **argv) off_t fd_size; QemuOpts *sn_opts = NULL; const char *sn_id_or_name = NULL; - const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:T:"; + const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:T:D:"; struct option lopt[] = { { "help", no_argument, NULL, 'h' }, { "version", no_argument, NULL, 'V' }, @@ -503,6 +504,7 @@ int main(int argc, char **argv) { "verbose", no_argument, NULL, 'v' }, { "object", required_argument, NULL, QEMU_NBD_OPT_OBJECT }, { "export-name", required_argument, NULL, 'x' }, + { "description", required_argument, NULL, 'D' }, { "tls-creds", required_argument, NULL, QEMU_NBD_OPT_TLSCREDS }, { "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS }, { "trace", required_argument, NULL, 'T' }, @@ -524,6 +526,7 @@ int main(int argc, char **argv) BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF; QDict *options = NULL; const char *export_name = NULL; + const char *export_description = NULL; const char *tlscredsid = NULL; bool imageOpts = false; bool writethrough = true; @@ -689,6 +692,9 @@ int main(int argc, char **argv) case 'x': export_name = optarg; break; + case 'D': + export_description = optarg; + break; case 'v': verbose = 1; break; @@ -937,7 +943,11 @@ int main(int argc, char **argv) } if (export_name) { nbd_export_set_name(exp, export_name); + nbd_export_set_description(exp, export_description); newproto = true; + } else if (export_description) { + error_report("Export description requires an export name"); + exit(EXIT_FAILURE); } server_ioc = qio_channel_socket_new(); diff --git a/qemu-nbd.texi b/qemu-nbd.texi index b7a9c6d02f..9a84e81eed 100644 --- a/qemu-nbd.texi +++ b/qemu-nbd.texi @@ -79,9 +79,12 @@ Disconnect the device @var{dev} Allow up to @var{num} clients to share the device (default @samp{1}) @item -t, --persistent Don't exit on the last connection -@item -x NAME, --export-name=NAME +@item -x, --export-name=@var{name} Set the NBD volume export name. This switches the server to use the new style NBD protocol negotiation +@item -D, --description=@var{description} +Set the NBD volume export description, as a human-readable +string. Requires the use of @option{-x} @item --tls-creds=ID Enable mandatory TLS encryption for the server by setting the ID of the TLS credentials object previously created with the --object diff --git a/qemu-options.hx b/qemu-options.hx index 95332cc05b..4536e18ac0 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3948,6 +3948,24 @@ secondary: If you want to know the detail of above command line, you can read the colo-compare git log. +@item -object cryptodev-backend-builtin,id=@var{id}[,queues=@var{queues}] + +Creates a cryptodev backend which executes crypto opreation from +the QEMU cipher APIS. The @var{id} parameter is +a unique ID that will be used to reference this cryptodev backend from +the @option{virtio-crypto} device. The @var{queues} parameter is optional, +which specify the queue number of cryptodev backend, the default of +@var{queues} is 1. + +@example + + # qemu-system-x86_64 \ + [...] \ + -object cryptodev-backend-builtin,id=cryptodev0 \ + -device virtio-crypto-pci,id=crypto0,cryptodev=cryptodev0 \ + [...] +@end example + @item -object secret,id=@var{id},data=@var{string},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}] @item -object secret,id=@var{id},file=@var{filename},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}] diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 3afa19a766..f084542934 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -1754,7 +1754,7 @@ sub process { # Ignore those directives where spaces _are_ permitted. if ($name =~ /^(?: if|for|while|switch|return|case| - volatile|__volatile__| + volatile|__volatile__|coroutine_fn| __attribute__|format|__extension__| asm|__asm__)$/x) { @@ -2498,8 +2498,8 @@ sub process { VMStateDescription| VMStateInfo}x; if ($line !~ /\bconst\b/ && - $line =~ /\b($struct_ops)\b/) { - ERROR("struct $1 should normally be const\n" . + $line =~ /\b($struct_ops)\b.*=/) { + ERROR("initializer for struct $1 should normally be const\n" . $herecurr); } diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index 7f236a7c1f..2b5bb74fce 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs @@ -9,6 +9,7 @@ stub-obj-y += clock-warp.o stub-obj-y += cpu-get-clock.o stub-obj-y += cpu-get-icount.o stub-obj-y += dump.o +stub-obj-y += error-printf.o stub-obj-y += fdset-add-fd.o stub-obj-y += fdset-find-fd.o stub-obj-y += fdset-get-fd.o @@ -23,7 +24,6 @@ stub-obj-y += is-daemonized.o stub-obj-y += machine-init-done.o stub-obj-y += migr-blocker.o stub-obj-y += mon-is-qmp.o -stub-obj-y += mon-printf.o stub-obj-y += monitor-init.o stub-obj-y += notify-event.o stub-obj-y += qtest.o diff --git a/stubs/error-printf.c b/stubs/error-printf.c new file mode 100644 index 0000000000..ac6b92aa69 --- /dev/null +++ b/stubs/error-printf.c @@ -0,0 +1,19 @@ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/error-report.h" + +void error_vprintf(const char *fmt, va_list ap) +{ + if (g_test_initialized() && !g_test_subprocess()) { + char *msg = g_strdup_vprintf(fmt, ap); + g_test_message("%s", msg); + g_free(msg); + } else { + vfprintf(stderr, fmt, ap); + } +} + +void error_vprintf_unless_qmp(const char *fmt, va_list ap) +{ + error_vprintf(fmt, ap); +} diff --git a/stubs/mon-printf.c b/stubs/mon-printf.c deleted file mode 100644 index e7c1e0cf74..0000000000 --- a/stubs/mon-printf.c +++ /dev/null @@ -1,11 +0,0 @@ -#include "qemu/osdep.h" -#include "qemu-common.h" -#include "monitor/monitor.h" - -void monitor_printf(Monitor *mon, const char *fmt, ...) -{ -} - -void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap) -{ -} diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 0f8a8fbd3b..14c5186fe7 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -239,6 +239,7 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, CPUID_7_0_EBX_RDSEED */ #define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE) +#define TCG_7_0_EDX_FEATURES 0 #define TCG_APM_FEATURES 0 #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT #define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1) @@ -444,6 +445,22 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .cpuid_reg = R_ECX, .tcg_features = TCG_7_0_ECX_FEATURES, }, + [FEAT_7_0_EDX] = { + .feat_names = { + NULL, NULL, "avx512-4vnniw", "avx512-4fmaps", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid_eax = 7, + .cpuid_needs_ecx = true, .cpuid_ecx = 0, + .cpuid_reg = R_EDX, + .tcg_features = TCG_7_0_EDX_FEATURES, + }, [FEAT_8000_0007_EDX] = { .feat_names = { NULL, NULL, NULL, NULL, @@ -2560,7 +2577,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if ((*ecx & CPUID_7_0_ECX_PKU) && env->cr[4] & CR4_PKE_MASK) { *ecx |= CPUID_7_0_ECX_OSPKE; } - *edx = 0; /* Reserved */ + *edx = env->features[FEAT_7_0_EDX]; /* Feature flags */ } else { *eax = 0; *ebx = 0; diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 6303d6593d..c605724022 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -443,6 +443,7 @@ typedef enum FeatureWord { FEAT_1_ECX, /* CPUID[1].ECX */ FEAT_7_0_EBX, /* CPUID[EAX=7,ECX=0].EBX */ FEAT_7_0_ECX, /* CPUID[EAX=7,ECX=0].ECX */ + FEAT_7_0_EDX, /* CPUID[EAX=7,ECX=0].EDX */ FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */ FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ @@ -629,6 +630,9 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_ECX_OSPKE (1U << 4) #define CPUID_7_0_ECX_RDPID (1U << 22) +#define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */ +#define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */ + #define CPUID_XSAVE_XSAVEOPT (1U << 0) #define CPUID_XSAVE_XSAVEC (1U << 1) #define CPUID_XSAVE_XGETBV1 (1U << 2) diff --git a/tests/ipmi-bt-test.c b/tests/ipmi-bt-test.c index ad1e97e91e..e84dd6889b 100644 --- a/tests/ipmi-bt-test.c +++ b/tests/ipmi-bt-test.c @@ -309,7 +309,7 @@ static void test_connect(void) uint8_t msg[100]; unsigned int msglen; static uint8_t exp1[] = { 0xff, 0x01, 0xa1 }; /* A protocol version */ - static uint8_t exp2[] = { 0x08, 0x1f, 0xa1 }; /* A capabilities cmd */ + static uint8_t exp2[] = { 0x08, 0x3f, 0xa1 }; /* A capabilities cmd */ FD_ZERO(&readfds); FD_SET(emu_lfd, &readfds); diff --git a/util/qemu-error.c b/util/qemu-error.c index 1ef35664af..b331f8f4a4 100644 --- a/util/qemu-error.c +++ b/util/qemu-error.c @@ -14,24 +14,6 @@ #include "monitor/monitor.h" #include "qemu/error-report.h" -/* - * Print to current monitor if we have one, else to stderr. - * TODO should return int, so callers can calculate width, but that - * requires surgery to monitor_vprintf(). Left for another day. - */ -void error_vprintf(const char *fmt, va_list ap) -{ - if (cur_mon && !monitor_cur_is_qmp()) { - monitor_vprintf(cur_mon, fmt, ap); - } else { - vfprintf(stderr, fmt, ap); - } -} - -/* - * Print to current monitor if we have one, else to stderr. - * TODO just like error_vprintf() - */ void error_printf(const char *fmt, ...) { va_list ap; @@ -45,11 +27,9 @@ void error_printf_unless_qmp(const char *fmt, ...) { va_list ap; - if (!monitor_cur_is_qmp()) { - va_start(ap, fmt); - error_vprintf(fmt, ap); - va_end(ap); - } + va_start(ap, fmt); + error_vprintf_unless_qmp(fmt, ap); + va_end(ap); } static Location std_loc = { @@ -1792,6 +1792,11 @@ void qemu_system_guest_panicked(void) } qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, &error_abort); vm_stop(RUN_STATE_GUEST_PANICKED); + if (!no_shutdown) { + qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF, + &error_abort); + qemu_system_shutdown_request(); + } } void qemu_system_reset_request(void) |