aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2021-06-30 21:09:27 +0100
committerPeter Maydell <peter.maydell@linaro.org>2021-06-30 21:09:27 +0100
commit1ec2cd0ce2ca94292ce237becc2c21b4eb9edca0 (patch)
tree2c2d10818a01841b37a2c0be156a4f56c91cc022
parentd940d468e29bff5eb5669c0dd8f3de0c3de17bfb (diff)
parent176c0a4973d3ca5d46b05d0edb439b154363d29f (diff)
Merge remote-tracking branch 'remotes/nvme/tags/nvme-next-pull-request' into staging
hw/nvme patches * namespace eui64 support (Heinrich) * aiocb refactoring (Klaus) * controller parameter for auto zone transitioning (Niklas) * misc fixes and additions (Gollu, Klaus, Keith) # gpg: Signature made Tue 29 Jun 2021 19:46:55 BST # gpg: using RSA key 522833AA75E2DCE6A24766C04DE1AF316D4F0DE9 # gpg: Good signature from "Klaus Jensen <its@irrelevant.dk>" [unknown] # gpg: aka "Klaus Jensen <k.jensen@samsung.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: DDCA 4D9C 9EF9 31CC 3468 4272 63D5 6FC5 E55D A838 # Subkey fingerprint: 5228 33AA 75E2 DCE6 A247 66C0 4DE1 AF31 6D4F 0DE9 * remotes/nvme/tags/nvme-next-pull-request: (23 commits) hw/nvme: add 'zoned.zasl' to documentation hw/nvme: fix pin-based interrupt behavior (again) hw/nvme: fix missing check for PMR capability hw/nvme: documentation fix hw/nvme: fix endianess conversion and add controller list Partially revert "hw/block/nvme: drain namespaces on sq deletion" hw/nvme: reimplement format nvm to allow cancellation hw/nvme: reimplement zone reset to allow cancellation hw/nvme: reimplement the copy command to allow aio cancellation hw/nvme: add dw0/1 to the req completion trace event hw/nvme: use prinfo directly in nvme_check_prinfo and nvme_dif_check hw/nvme: remove assert from nvme_get_zone_by_slba hw/nvme: save reftag when generating pi hw/nvme: reimplement dsm to allow cancellation hw/nvme: add nvme_block_status_all helper hw/nvme: reimplement flush to allow cancellation hw/nvme: default for namespace EUI-64 hw/nvme: namespace parameter for EUI-64 hw/nvme: fix csi field for cns 0x00 and 0x11 hw/nvme: add param to control auto zone transitioning to zone state closed ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--docs/system/nvme.rst12
-rw-r--r--hw/core/machine.c1
-rw-r--r--hw/nvme/ctrl.c1951
-rw-r--r--hw/nvme/dif.c64
-rw-r--r--hw/nvme/ns.c62
-rw-r--r--hw/nvme/nvme.h15
-rw-r--r--hw/nvme/trace-events23
-rw-r--r--include/block/nvme.h18
8 files changed, 1242 insertions, 904 deletions
diff --git a/docs/system/nvme.rst b/docs/system/nvme.rst
index f7f63d6bf6..bff72d1c24 100644
--- a/docs/system/nvme.rst
+++ b/docs/system/nvme.rst
@@ -81,6 +81,12 @@ There are a number of parameters available:
Set the UUID of the namespace. This will be reported as a "Namespace UUID"
descriptor in the Namespace Identification Descriptor List.
+``eui64``
+ Set the EUI-64 of the namespace. This will be reported as a "IEEE Extended
+ Unique Identifier" descriptor in the Namespace Identification Descriptor List.
+ Since machine type 6.1 a non-zero default value is used if the parameter
+ is not provided. For earlier machine types the field defaults to 0.
+
``bus``
If there are more ``nvme`` devices defined, this parameter may be used to
attach the namespace to a specific ``nvme`` device (identified by an ``id``
@@ -196,6 +202,12 @@ The namespace may be configured with additional parameters
allows all zones to be open. If ``zoned.max_active`` is specified, this value
must be less than or equal to that.
+``zoned.zasl=UINT8`` (default: ``0``)
+ Set the maximum data transfer size for the Zone Append command. Like
+ ``mdts``, the value is specified as a power of two (2^n) and is in units of
+ the minimum memory page size (CAP.MPSMIN). The default value (``0``)
+ has this property inherit the ``mdts`` value.
+
Metadata
--------
diff --git a/hw/core/machine.c b/hw/core/machine.c
index ffc076ae84..ca69f0343a 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -39,6 +39,7 @@
GlobalProperty hw_compat_6_0[] = {
{ "gpex-pcihost", "allow-unmapped-accesses", "false" },
{ "i8042", "extended-state", "false"},
+ { "nvme-ns", "eui64-default", "off"},
};
const size_t hw_compat_6_0_len = G_N_ELEMENTS(hw_compat_6_0);
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 0bcaf7192f..629b0d38c2 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -34,6 +34,7 @@
* aerl=<N[optional]>,aer_max_queued=<N[optional]>, \
* mdts=<N[optional]>,vsl=<N[optional]>, \
* zoned.zasl=<N[optional]>, \
+ * zoned.auto_transition=<on|off[optional]>, \
* subsys=<subsys_id>
* -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\
* zoned=<true|false[optional]>, \
@@ -100,6 +101,11 @@
* the minimum memory page size (CAP.MPSMIN). The default value is 0 (i.e.
* defaulting to the value of `mdts`).
*
+ * - `zoned.auto_transition`
+ * Indicates if zones in zone state implicitly opened can be automatically
+ * transitioned to zone state closed for resource management purposes.
+ * Defaults to 'on'.
+ *
* nvme namespace device parameters
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* - `shared`
@@ -114,7 +120,7 @@
* This parameter is only valid together with the `subsys` parameter. If left
* at the default value (`false/off`), the namespace will be attached to all
* controllers in the NVMe subsystem at boot-up. If set to `true/on`, the
- * namespace will be be available in the subsystem not not attached to any
+ * namespace will be available in the subsystem but not attached to any
* controllers.
*
* Setting `zoned` to true selects Zoned Command Set at the namespace.
@@ -467,7 +473,9 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
return;
} else {
assert(cq->vector < 32);
- n->irq_status &= ~(1 << cq->vector);
+ if (!n->cq_pending) {
+ n->irq_status &= ~(1 << cq->vector);
+ }
nvme_irq_check(n);
}
}
@@ -1004,16 +1012,12 @@ static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
{
NvmeNamespace *ns = req->ns;
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
- uint16_t ctrl = le16_to_cpu(rw->control);
+ bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps);
+ bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT);
size_t len = nvme_l2b(ns, nlb);
uint16_t status;
- if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
- (ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8)) {
- goto out;
- }
-
- if (nvme_ns_ext(ns)) {
+ if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) {
NvmeSg sg;
len += nvme_m2b(ns, nlb);
@@ -1030,7 +1034,6 @@ static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
return NVME_SUCCESS;
}
-out:
return nvme_map_dptr(n, &req->sg, len, &req->cmd);
}
@@ -1189,10 +1192,10 @@ uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
{
NvmeNamespace *ns = req->ns;
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
- uint16_t ctrl = le16_to_cpu(rw->control);
+ bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps);
+ bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT);
- if (nvme_ns_ext(ns) &&
- !(ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8)) {
+ if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) {
return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbasz,
ns->lbaf.ms, 0, dir);
}
@@ -1252,6 +1255,7 @@ static void nvme_post_cqes(void *opaque)
NvmeCQueue *cq = opaque;
NvmeCtrl *n = cq->ctrl;
NvmeRequest *req, *next;
+ bool pending = cq->head != cq->tail;
int ret;
QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
@@ -1281,6 +1285,10 @@ static void nvme_post_cqes(void *opaque)
QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
}
if (cq->tail != cq->head) {
+ if (cq->irq_enabled && !pending) {
+ n->cq_pending++;
+ }
+
nvme_irq_assert(n, cq);
}
}
@@ -1289,6 +1297,8 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
{
assert(cq->cqid == req->sq->cqid);
trace_pci_nvme_enqueue_req_completion(nvme_cid(req), cq->cqid,
+ le32_to_cpu(req->cqe.result),
+ le32_to_cpu(req->cqe.dw1),
req->status);
if (req->status) {
@@ -1432,18 +1442,15 @@ static inline uint16_t nvme_check_bounds(NvmeNamespace *ns, uint64_t slba,
return NVME_SUCCESS;
}
-static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
- uint32_t nlb)
+static int nvme_block_status_all(NvmeNamespace *ns, uint64_t slba,
+ uint32_t nlb, int flags)
{
BlockDriverState *bs = blk_bs(ns->blkconf.blk);
int64_t pnum = 0, bytes = nvme_l2b(ns, nlb);
int64_t offset = nvme_l2b(ns, slba);
- bool zeroed;
int ret;
- Error *local_err = NULL;
-
/*
* `pnum` holds the number of bytes after offset that shares the same
* allocation status as the byte at offset. If `pnum` is different from
@@ -1455,23 +1462,41 @@ static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL);
if (ret < 0) {
- error_setg_errno(&local_err, -ret, "unable to get block status");
- error_report_err(local_err);
-
- return NVME_INTERNAL_DEV_ERROR;
+ return ret;
}
- zeroed = !!(ret & BDRV_BLOCK_ZERO);
- trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed);
+ trace_pci_nvme_block_status(offset, bytes, pnum, ret,
+ !!(ret & BDRV_BLOCK_ZERO));
- if (zeroed) {
- return NVME_DULB;
+ if (!(ret & flags)) {
+ return 1;
}
offset += pnum;
} while (pnum != bytes);
+ return 0;
+}
+
+static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
+ uint32_t nlb)
+{
+ int ret;
+ Error *err = NULL;
+
+ ret = nvme_block_status_all(ns, slba, nlb, BDRV_BLOCK_DATA);
+ if (ret) {
+ if (ret < 0) {
+ error_setg_errno(&err, -ret, "unable to get block status");
+ error_report_err(err);
+
+ return NVME_INTERNAL_DEV_ERROR;
+ }
+
+ return NVME_DULB;
+ }
+
return NVME_SUCCESS;
}
@@ -1521,7 +1546,10 @@ static inline NvmeZone *nvme_get_zone_by_slba(NvmeNamespace *ns, uint64_t slba)
{
uint32_t zone_idx = nvme_zone_idx(ns, slba);
- assert(zone_idx < ns->num_zones);
+ if (zone_idx >= ns->num_zones) {
+ return NULL;
+ }
+
return &ns->zone_array[zone_idx];
}
@@ -1598,11 +1626,16 @@ static uint16_t nvme_check_zone_state_for_read(NvmeZone *zone)
static uint16_t nvme_check_zone_read(NvmeNamespace *ns, uint64_t slba,
uint32_t nlb)
{
- NvmeZone *zone = nvme_get_zone_by_slba(ns, slba);
- uint64_t bndry = nvme_zone_rd_boundary(ns, zone);
- uint64_t end = slba + nlb;
+ NvmeZone *zone;
+ uint64_t bndry, end;
uint16_t status;
+ zone = nvme_get_zone_by_slba(ns, slba);
+ assert(zone);
+
+ bndry = nvme_zone_rd_boundary(ns, zone);
+ end = slba + nlb;
+
status = nvme_check_zone_state_for_read(zone);
if (status) {
;
@@ -1665,6 +1698,29 @@ static uint16_t nvme_zrm_close(NvmeNamespace *ns, NvmeZone *zone)
}
}
+static uint16_t nvme_zrm_reset(NvmeNamespace *ns, NvmeZone *zone)
+{
+ switch (nvme_get_zone_state(zone)) {
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ nvme_aor_dec_open(ns);
+ /* fallthrough */
+ case NVME_ZONE_STATE_CLOSED:
+ nvme_aor_dec_active(ns);
+ /* fallthrough */
+ case NVME_ZONE_STATE_FULL:
+ zone->w_ptr = zone->d.zslba;
+ zone->d.wp = zone->w_ptr;
+ nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY);
+ /* fallthrough */
+ case NVME_ZONE_STATE_EMPTY:
+ return NVME_SUCCESS;
+
+ default:
+ return NVME_ZONE_INVAL_TRANSITION;
+ }
+}
+
static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns)
{
NvmeZone *zone;
@@ -1686,8 +1742,8 @@ enum {
NVME_ZRM_AUTO = 1 << 0,
};
-static uint16_t nvme_zrm_open_flags(NvmeNamespace *ns, NvmeZone *zone,
- int flags)
+static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeZone *zone, int flags)
{
int act = 0;
uint16_t status;
@@ -1699,7 +1755,9 @@ static uint16_t nvme_zrm_open_flags(NvmeNamespace *ns, NvmeZone *zone,
/* fallthrough */
case NVME_ZONE_STATE_CLOSED:
- nvme_zrm_auto_transition_zone(ns);
+ if (n->params.auto_transition_zones) {
+ nvme_zrm_auto_transition_zone(ns);
+ }
status = nvme_aor_check(ns, act, 1);
if (status) {
return status;
@@ -1735,14 +1793,16 @@ static uint16_t nvme_zrm_open_flags(NvmeNamespace *ns, NvmeZone *zone,
}
}
-static inline uint16_t nvme_zrm_auto(NvmeNamespace *ns, NvmeZone *zone)
+static inline uint16_t nvme_zrm_auto(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeZone *zone)
{
- return nvme_zrm_open_flags(ns, zone, NVME_ZRM_AUTO);
+ return nvme_zrm_open_flags(n, ns, zone, NVME_ZRM_AUTO);
}
-static inline uint16_t nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone)
+static inline uint16_t nvme_zrm_open(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeZone *zone)
{
- return nvme_zrm_open_flags(ns, zone, 0);
+ return nvme_zrm_open_flags(n, ns, zone, 0);
}
static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
@@ -1765,6 +1825,7 @@ static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req)
slba = le64_to_cpu(rw->slba);
nlb = le16_to_cpu(rw->nlb) + 1;
zone = nvme_get_zone_by_slba(ns, slba);
+ assert(zone);
nvme_advance_zone_wp(ns, zone, nlb);
}
@@ -1778,22 +1839,19 @@ static inline bool nvme_is_write(NvmeRequest *req)
rw->opcode == NVME_CMD_WRITE_ZEROES;
}
+static AioContext *nvme_get_aio_context(BlockAIOCB *acb)
+{
+ return qemu_get_aio_context();
+}
+
static void nvme_misc_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
- NvmeNamespace *ns = req->ns;
- BlockBackend *blk = ns->blkconf.blk;
- BlockAcctCookie *acct = &req->acct;
- BlockAcctStats *stats = blk_get_stats(blk);
-
- trace_pci_nvme_misc_cb(nvme_cid(req), blk_name(blk));
+ trace_pci_nvme_misc_cb(nvme_cid(req));
if (ret) {
- block_acct_failed(stats, acct);
nvme_aio_err(req, ret);
- } else {
- block_acct_done(stats, acct);
}
nvme_enqueue_req_completion(nvme_cq(req), req);
@@ -1873,77 +1931,6 @@ out:
nvme_rw_complete_cb(req, ret);
}
-struct nvme_aio_format_ctx {
- NvmeRequest *req;
- NvmeNamespace *ns;
-
- /* number of outstanding write zeroes for this namespace */
- int *count;
-};
-
-static void nvme_aio_format_cb(void *opaque, int ret)
-{
- struct nvme_aio_format_ctx *ctx = opaque;
- NvmeRequest *req = ctx->req;
- NvmeNamespace *ns = ctx->ns;
- uintptr_t *num_formats = (uintptr_t *)&req->opaque;
- int *count = ctx->count;
-
- g_free(ctx);
-
- if (ret) {
- nvme_aio_err(req, ret);
- }
-
- if (--(*count)) {
- return;
- }
-
- g_free(count);
- ns->status = 0x0;
-
- if (--(*num_formats)) {
- return;
- }
-
- nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-struct nvme_aio_flush_ctx {
- NvmeRequest *req;
- NvmeNamespace *ns;
- BlockAcctCookie acct;
-};
-
-static void nvme_aio_flush_cb(void *opaque, int ret)
-{
- struct nvme_aio_flush_ctx *ctx = opaque;
- NvmeRequest *req = ctx->req;
- uintptr_t *num_flushes = (uintptr_t *)&req->opaque;
-
- BlockBackend *blk = ctx->ns->blkconf.blk;
- BlockAcctCookie *acct = &ctx->acct;
- BlockAcctStats *stats = blk_get_stats(blk);
-
- trace_pci_nvme_aio_flush_cb(nvme_cid(req), blk_name(blk));
-
- if (!ret) {
- block_acct_done(stats, acct);
- } else {
- block_acct_failed(stats, acct);
- nvme_aio_err(req, ret);
- }
-
- (*num_flushes)--;
- g_free(ctx);
-
- if (*num_flushes) {
- return;
- }
-
- nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
static void nvme_verify_cb(void *opaque, int ret)
{
NvmeBounceContext *ctx = opaque;
@@ -1954,14 +1941,13 @@ static void nvme_verify_cb(void *opaque, int ret)
BlockAcctStats *stats = blk_get_stats(blk);
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
uint64_t slba = le64_to_cpu(rw->slba);
- uint16_t ctrl = le16_to_cpu(rw->control);
+ uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
uint16_t apptag = le16_to_cpu(rw->apptag);
uint16_t appmask = le16_to_cpu(rw->appmask);
uint32_t reftag = le32_to_cpu(rw->reftag);
uint16_t status;
- trace_pci_nvme_verify_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag,
- appmask, reftag);
+ trace_pci_nvme_verify_cb(nvme_cid(req), prinfo, apptag, appmask, reftag);
if (ret) {
block_acct_failed(stats, acct);
@@ -1981,7 +1967,7 @@ static void nvme_verify_cb(void *opaque, int ret)
req->status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
ctx->mdata.bounce, ctx->mdata.iov.size,
- ctrl, slba, apptag, appmask, reftag);
+ prinfo, slba, apptag, appmask, &reftag);
}
out:
@@ -2028,326 +2014,6 @@ out:
nvme_verify_cb(ctx, ret);
}
-static void nvme_aio_discard_cb(void *opaque, int ret)
-{
- NvmeRequest *req = opaque;
- uintptr_t *discards = (uintptr_t *)&req->opaque;
-
- trace_pci_nvme_aio_discard_cb(nvme_cid(req));
-
- if (ret) {
- nvme_aio_err(req, ret);
- }
-
- (*discards)--;
-
- if (*discards) {
- return;
- }
-
- nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-struct nvme_zone_reset_ctx {
- NvmeRequest *req;
- NvmeZone *zone;
-};
-
-static void nvme_aio_zone_reset_complete_cb(void *opaque, int ret)
-{
- struct nvme_zone_reset_ctx *ctx = opaque;
- NvmeRequest *req = ctx->req;
- NvmeNamespace *ns = req->ns;
- NvmeZone *zone = ctx->zone;
- uintptr_t *resets = (uintptr_t *)&req->opaque;
-
- if (ret) {
- nvme_aio_err(req, ret);
- goto out;
- }
-
- switch (nvme_get_zone_state(zone)) {
- case NVME_ZONE_STATE_EXPLICITLY_OPEN:
- case NVME_ZONE_STATE_IMPLICITLY_OPEN:
- nvme_aor_dec_open(ns);
- /* fall through */
- case NVME_ZONE_STATE_CLOSED:
- nvme_aor_dec_active(ns);
- /* fall through */
- case NVME_ZONE_STATE_FULL:
- zone->w_ptr = zone->d.zslba;
- zone->d.wp = zone->w_ptr;
- nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY);
- /* fall through */
- default:
- break;
- }
-
-out:
- g_free(ctx);
-
- (*resets)--;
-
- if (*resets) {
- return;
- }
-
- nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-static void nvme_aio_zone_reset_cb(void *opaque, int ret)
-{
- struct nvme_zone_reset_ctx *ctx = opaque;
- NvmeRequest *req = ctx->req;
- NvmeNamespace *ns = req->ns;
- NvmeZone *zone = ctx->zone;
-
- trace_pci_nvme_aio_zone_reset_cb(nvme_cid(req), zone->d.zslba);
-
- if (ret) {
- goto out;
- }
-
- if (ns->lbaf.ms) {
- int64_t offset = nvme_moff(ns, zone->d.zslba);
-
- blk_aio_pwrite_zeroes(ns->blkconf.blk, offset,
- nvme_m2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP,
- nvme_aio_zone_reset_complete_cb, ctx);
- return;
- }
-
-out:
- nvme_aio_zone_reset_complete_cb(opaque, ret);
-}
-
-struct nvme_copy_ctx {
- int copies;
- uint8_t *bounce;
- uint8_t *mbounce;
- uint32_t nlb;
- NvmeCopySourceRange *ranges;
-};
-
-struct nvme_copy_in_ctx {
- NvmeRequest *req;
- QEMUIOVector iov;
- NvmeCopySourceRange *range;
-};
-
-static void nvme_copy_complete_cb(void *opaque, int ret)
-{
- NvmeRequest *req = opaque;
- NvmeNamespace *ns = req->ns;
- struct nvme_copy_ctx *ctx = req->opaque;
-
- if (ret) {
- block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct);
- nvme_aio_err(req, ret);
- goto out;
- }
-
- block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
-
-out:
- if (ns->params.zoned) {
- NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
- uint64_t sdlba = le64_to_cpu(copy->sdlba);
- NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba);
-
- nvme_advance_zone_wp(ns, zone, ctx->nlb);
- }
-
- g_free(ctx->bounce);
- g_free(ctx->mbounce);
- g_free(ctx);
-
- nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-static void nvme_copy_cb(void *opaque, int ret)
-{
- NvmeRequest *req = opaque;
- NvmeNamespace *ns = req->ns;
- struct nvme_copy_ctx *ctx = req->opaque;
-
- trace_pci_nvme_copy_cb(nvme_cid(req));
-
- if (ret) {
- goto out;
- }
-
- if (ns->lbaf.ms) {
- NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
- uint64_t sdlba = le64_to_cpu(copy->sdlba);
- int64_t offset = nvme_moff(ns, sdlba);
-
- qemu_iovec_reset(&req->sg.iov);
- qemu_iovec_add(&req->sg.iov, ctx->mbounce, nvme_m2b(ns, ctx->nlb));
-
- req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &req->sg.iov, 0,
- nvme_copy_complete_cb, req);
- return;
- }
-
-out:
- nvme_copy_complete_cb(opaque, ret);
-}
-
-static void nvme_copy_in_complete(NvmeRequest *req)
-{
- NvmeNamespace *ns = req->ns;
- NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
- struct nvme_copy_ctx *ctx = req->opaque;
- uint64_t sdlba = le64_to_cpu(copy->sdlba);
- uint16_t status;
-
- trace_pci_nvme_copy_in_complete(nvme_cid(req));
-
- block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
-
- if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
- uint16_t prinfor = (copy->control[0] >> 4) & 0xf;
- uint16_t prinfow = (copy->control[2] >> 2) & 0xf;
- uint16_t nr = copy->nr + 1;
- NvmeCopySourceRange *range;
- uint64_t slba;
- uint32_t nlb;
- uint16_t apptag, appmask;
- uint32_t reftag;
- uint8_t *buf = ctx->bounce, *mbuf = ctx->mbounce;
- size_t len, mlen;
- int i;
-
- /*
- * The dif helpers expects prinfo to be similar to the control field of
- * the NvmeRwCmd, so shift by 10 to fake it.
- */
- prinfor = prinfor << 10;
- prinfow = prinfow << 10;
-
- for (i = 0; i < nr; i++) {
- range = &ctx->ranges[i];
- slba = le64_to_cpu(range->slba);
- nlb = le16_to_cpu(range->nlb) + 1;
- len = nvme_l2b(ns, nlb);
- mlen = nvme_m2b(ns, nlb);
- apptag = le16_to_cpu(range->apptag);
- appmask = le16_to_cpu(range->appmask);
- reftag = le32_to_cpu(range->reftag);
-
- status = nvme_dif_check(ns, buf, len, mbuf, mlen, prinfor, slba,
- apptag, appmask, reftag);
- if (status) {
- goto invalid;
- }
-
- buf += len;
- mbuf += mlen;
- }
-
- apptag = le16_to_cpu(copy->apptag);
- appmask = le16_to_cpu(copy->appmask);
- reftag = le32_to_cpu(copy->reftag);
-
- if (prinfow & NVME_RW_PRINFO_PRACT) {
- size_t len = nvme_l2b(ns, ctx->nlb);
- size_t mlen = nvme_m2b(ns, ctx->nlb);
-
- status = nvme_check_prinfo(ns, prinfow, sdlba, reftag);
- if (status) {
- goto invalid;
- }
-
- nvme_dif_pract_generate_dif(ns, ctx->bounce, len, ctx->mbounce,
- mlen, apptag, reftag);
- } else {
- status = nvme_dif_check(ns, ctx->bounce, len, ctx->mbounce, mlen,
- prinfow, sdlba, apptag, appmask, reftag);
- if (status) {
- goto invalid;
- }
- }
- }
-
- status = nvme_check_bounds(ns, sdlba, ctx->nlb);
- if (status) {
- goto invalid;
- }
-
- if (ns->params.zoned) {
- NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba);
-
- status = nvme_check_zone_write(ns, zone, sdlba, ctx->nlb);
- if (status) {
- goto invalid;
- }
-
- status = nvme_zrm_auto(ns, zone);
- if (status) {
- goto invalid;
- }
-
- zone->w_ptr += ctx->nlb;
- }
-
- qemu_iovec_init(&req->sg.iov, 1);
- qemu_iovec_add(&req->sg.iov, ctx->bounce, nvme_l2b(ns, ctx->nlb));
-
- block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0,
- BLOCK_ACCT_WRITE);
-
- req->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, sdlba),
- &req->sg.iov, 0, nvme_copy_cb, req);
-
- return;
-
-invalid:
- req->status = status;
-
- g_free(ctx->bounce);
- g_free(ctx);
-
- nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-static void nvme_aio_copy_in_cb(void *opaque, int ret)
-{
- struct nvme_copy_in_ctx *in_ctx = opaque;
- NvmeRequest *req = in_ctx->req;
- NvmeNamespace *ns = req->ns;
- struct nvme_copy_ctx *ctx = req->opaque;
-
- qemu_iovec_destroy(&in_ctx->iov);
- g_free(in_ctx);
-
- trace_pci_nvme_aio_copy_in_cb(nvme_cid(req));
-
- if (ret) {
- nvme_aio_err(req, ret);
- }
-
- ctx->copies--;
-
- if (ctx->copies) {
- return;
- }
-
- if (req->status) {
- block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct);
-
- g_free(ctx->bounce);
- g_free(ctx->mbounce);
- g_free(ctx);
-
- nvme_enqueue_req_completion(nvme_cq(req), req);
-
- return;
- }
-
- nvme_copy_in_complete(req);
-}
-
struct nvme_compare_ctx {
struct {
QEMUIOVector iov;
@@ -2366,7 +2032,7 @@ static void nvme_compare_mdata_cb(void *opaque, int ret)
NvmeNamespace *ns = req->ns;
NvmeCtrl *n = nvme_ctrl(req);
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
- uint16_t ctrl = le16_to_cpu(rw->control);
+ uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
uint16_t apptag = le16_to_cpu(rw->apptag);
uint16_t appmask = le16_to_cpu(rw->appmask);
uint32_t reftag = le32_to_cpu(rw->reftag);
@@ -2402,8 +2068,8 @@ static void nvme_compare_mdata_cb(void *opaque, int ret)
int16_t pil = 0;
status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
- ctx->mdata.bounce, ctx->mdata.iov.size, ctrl,
- slba, apptag, appmask, reftag);
+ ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
+ slba, apptag, appmask, &reftag);
if (status) {
req->status = status;
goto out;
@@ -2508,75 +2174,182 @@ out:
nvme_enqueue_req_completion(nvme_cq(req), req);
}
-static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
+typedef struct NvmeDSMAIOCB {
+ BlockAIOCB common;
+ BlockAIOCB *aiocb;
+ NvmeRequest *req;
+ QEMUBH *bh;
+ int ret;
+
+ NvmeDsmRange *range;
+ unsigned int nr;
+ unsigned int idx;
+} NvmeDSMAIOCB;
+
+static void nvme_dsm_cancel(BlockAIOCB *aiocb)
{
+ NvmeDSMAIOCB *iocb = container_of(aiocb, NvmeDSMAIOCB, common);
+
+ /* break nvme_dsm_cb loop */
+ iocb->idx = iocb->nr;
+ iocb->ret = -ECANCELED;
+
+ if (iocb->aiocb) {
+ blk_aio_cancel_async(iocb->aiocb);
+ iocb->aiocb = NULL;
+ } else {
+ /*
+ * We only reach this if nvme_dsm_cancel() has already been called or
+ * the command ran to completion and nvme_dsm_bh is scheduled to run.
+ */
+ assert(iocb->idx == iocb->nr);
+ }
+}
+
+static const AIOCBInfo nvme_dsm_aiocb_info = {
+ .aiocb_size = sizeof(NvmeDSMAIOCB),
+ .cancel_async = nvme_dsm_cancel,
+};
+
+static void nvme_dsm_bh(void *opaque)
+{
+ NvmeDSMAIOCB *iocb = opaque;
+
+ iocb->common.cb(iocb->common.opaque, iocb->ret);
+
+ qemu_bh_delete(iocb->bh);
+ iocb->bh = NULL;
+ qemu_aio_unref(iocb);
+}
+
+static void nvme_dsm_cb(void *opaque, int ret);
+
+static void nvme_dsm_md_cb(void *opaque, int ret)
+{
+ NvmeDSMAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
NvmeNamespace *ns = req->ns;
- NvmeDsmCmd *dsm = (NvmeDsmCmd *) &req->cmd;
+ NvmeDsmRange *range;
+ uint64_t slba;
+ uint32_t nlb;
- uint32_t attr = le32_to_cpu(dsm->attributes);
- uint32_t nr = (le32_to_cpu(dsm->nr) & 0xff) + 1;
+ if (ret < 0) {
+ iocb->ret = ret;
+ goto done;
+ }
- uint16_t status = NVME_SUCCESS;
+ if (!ns->lbaf.ms) {
+ nvme_dsm_cb(iocb, 0);
+ return;
+ }
- trace_pci_nvme_dsm(nvme_cid(req), nvme_nsid(ns), nr, attr);
+ range = &iocb->range[iocb->idx - 1];
+ slba = le64_to_cpu(range->slba);
+ nlb = le32_to_cpu(range->nlb);
- if (attr & NVME_DSMGMT_AD) {
- int64_t offset;
- size_t len;
- NvmeDsmRange range[nr];
- uintptr_t *discards = (uintptr_t *)&req->opaque;
+ /*
+ * Check that all block were discarded (zeroed); otherwise we do not zero
+ * the metadata.
+ */
- status = nvme_h2c(n, (uint8_t *)range, sizeof(range), req);
- if (status) {
- return status;
+ ret = nvme_block_status_all(ns, slba, nlb, BDRV_BLOCK_ZERO);
+ if (ret) {
+ if (ret < 0) {
+ iocb->ret = ret;
+ goto done;
}
- /*
- * AIO callbacks may be called immediately, so initialize discards to 1
- * to make sure the the callback does not complete the request before
- * all discards have been issued.
- */
- *discards = 1;
+ nvme_dsm_cb(iocb, 0);
+ }
- for (int i = 0; i < nr; i++) {
- uint64_t slba = le64_to_cpu(range[i].slba);
- uint32_t nlb = le32_to_cpu(range[i].nlb);
+ iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_moff(ns, slba),
+ nvme_m2b(ns, nlb), BDRV_REQ_MAY_UNMAP,
+ nvme_dsm_cb, iocb);
+ return;
- if (nvme_check_bounds(ns, slba, nlb)) {
- continue;
- }
+done:
+ iocb->aiocb = NULL;
+ qemu_bh_schedule(iocb->bh);
+}
- trace_pci_nvme_dsm_deallocate(nvme_cid(req), nvme_nsid(ns), slba,
- nlb);
+static void nvme_dsm_cb(void *opaque, int ret)
+{
+ NvmeDSMAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
+ NvmeCtrl *n = nvme_ctrl(req);
+ NvmeNamespace *ns = req->ns;
+ NvmeDsmRange *range;
+ uint64_t slba;
+ uint32_t nlb;
- if (nlb > n->dmrsl) {
- trace_pci_nvme_dsm_single_range_limit_exceeded(nlb, n->dmrsl);
- }
+ if (ret < 0) {
+ iocb->ret = ret;
+ goto done;
+ }
- offset = nvme_l2b(ns, slba);
- len = nvme_l2b(ns, nlb);
+next:
+ if (iocb->idx == iocb->nr) {
+ goto done;
+ }
- while (len) {
- size_t bytes = MIN(BDRV_REQUEST_MAX_BYTES, len);
+ range = &iocb->range[iocb->idx++];
+ slba = le64_to_cpu(range->slba);
+ nlb = le32_to_cpu(range->nlb);
- (*discards)++;
+ trace_pci_nvme_dsm_deallocate(slba, nlb);
- blk_aio_pdiscard(ns->blkconf.blk, offset, bytes,
- nvme_aio_discard_cb, req);
+ if (nlb > n->dmrsl) {
+ trace_pci_nvme_dsm_single_range_limit_exceeded(nlb, n->dmrsl);
+ goto next;
+ }
- offset += bytes;
- len -= bytes;
- }
- }
+ if (nvme_check_bounds(ns, slba, nlb)) {
+ trace_pci_nvme_err_invalid_lba_range(slba, nlb,
+ ns->id_ns.nsze);
+ goto next;
+ }
- /* account for the 1-initialization */
- (*discards)--;
+ iocb->aiocb = blk_aio_pdiscard(ns->blkconf.blk, nvme_l2b(ns, slba),
+ nvme_l2b(ns, nlb),
+ nvme_dsm_md_cb, iocb);
+ return;
- if (*discards) {
- status = NVME_NO_COMPLETE;
- } else {
- status = req->status;
+done:
+ iocb->aiocb = NULL;
+ qemu_bh_schedule(iocb->bh);
+}
+
+static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeNamespace *ns = req->ns;
+ NvmeDsmCmd *dsm = (NvmeDsmCmd *) &req->cmd;
+ uint32_t attr = le32_to_cpu(dsm->attributes);
+ uint32_t nr = (le32_to_cpu(dsm->nr) & 0xff) + 1;
+ uint16_t status = NVME_SUCCESS;
+
+ trace_pci_nvme_dsm(nr, attr);
+
+ if (attr & NVME_DSMGMT_AD) {
+ NvmeDSMAIOCB *iocb = blk_aio_get(&nvme_dsm_aiocb_info, ns->blkconf.blk,
+ nvme_misc_cb, req);
+
+ iocb->req = req;
+ iocb->bh = qemu_bh_new(nvme_dsm_bh, iocb);
+ iocb->ret = 0;
+ iocb->range = g_new(NvmeDsmRange, nr);
+ iocb->nr = nr;
+ iocb->idx = 0;
+
+ status = nvme_h2c(n, (uint8_t *)iocb->range, sizeof(NvmeDsmRange) * nr,
+ req);
+ if (status) {
+ return status;
}
+
+ req->aiocb = &iocb->common;
+ nvme_dsm_cb(iocb, 0);
+
+ return NVME_NO_COMPLETE;
}
return status;
@@ -2591,7 +2364,7 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req)
uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
size_t len = nvme_l2b(ns, nlb);
int64_t offset = nvme_l2b(ns, slba);
- uint16_t ctrl = le16_to_cpu(rw->control);
+ uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
uint32_t reftag = le32_to_cpu(rw->reftag);
NvmeBounceContext *ctx = NULL;
uint16_t status;
@@ -2599,12 +2372,12 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req)
trace_pci_nvme_verify(nvme_cid(req), nvme_nsid(ns), slba, nlb);
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
- status = nvme_check_prinfo(ns, ctrl, slba, reftag);
+ status = nvme_check_prinfo(ns, prinfo, slba, reftag);
if (status) {
return status;
}
- if (ctrl & NVME_RW_PRINFO_PRACT) {
+ if (prinfo & NVME_PRINFO_PRACT) {
return NVME_INVALID_PROT_INFO | NVME_DNR;
}
}
@@ -2641,158 +2414,433 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req)
return NVME_NO_COMPLETE;
}
-static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
+typedef struct NvmeCopyAIOCB {
+ BlockAIOCB common;
+ BlockAIOCB *aiocb;
+ NvmeRequest *req;
+ QEMUBH *bh;
+ int ret;
+
+ NvmeCopySourceRange *ranges;
+ int nr;
+ int idx;
+
+ uint8_t *bounce;
+ QEMUIOVector iov;
+ struct {
+ BlockAcctCookie read;
+ BlockAcctCookie write;
+ } acct;
+
+ uint32_t reftag;
+ uint64_t slba;
+
+ NvmeZone *zone;
+} NvmeCopyAIOCB;
+
+static void nvme_copy_cancel(BlockAIOCB *aiocb)
+{
+ NvmeCopyAIOCB *iocb = container_of(aiocb, NvmeCopyAIOCB, common);
+
+ iocb->ret = -ECANCELED;
+
+ if (iocb->aiocb) {
+ blk_aio_cancel_async(iocb->aiocb);
+ iocb->aiocb = NULL;
+ }
+}
+
+static const AIOCBInfo nvme_copy_aiocb_info = {
+ .aiocb_size = sizeof(NvmeCopyAIOCB),
+ .cancel_async = nvme_copy_cancel,
+};
+
+static void nvme_copy_bh(void *opaque)
{
+ NvmeCopyAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
NvmeNamespace *ns = req->ns;
- NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+ BlockAcctStats *stats = blk_get_stats(ns->blkconf.blk);
- uint16_t nr = copy->nr + 1;
- uint8_t format = copy->control[0] & 0xf;
+ if (iocb->idx != iocb->nr) {
+ req->cqe.result = cpu_to_le32(iocb->idx);
+ }
- /*
- * Shift the PRINFOR/PRINFOW values by 10 to allow reusing the
- * NVME_RW_PRINFO constants.
- */
- uint16_t prinfor = ((copy->control[0] >> 4) & 0xf) << 10;
- uint16_t prinfow = ((copy->control[2] >> 2) & 0xf) << 10;
+ qemu_iovec_destroy(&iocb->iov);
+ g_free(iocb->bounce);
- uint32_t nlb = 0;
- uint8_t *bounce = NULL, *bouncep = NULL;
- uint8_t *mbounce = NULL, *mbouncep = NULL;
- struct nvme_copy_ctx *ctx;
- uint16_t status;
- int i;
+ qemu_bh_delete(iocb->bh);
+ iocb->bh = NULL;
- trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format);
+ if (iocb->ret < 0) {
+ block_acct_failed(stats, &iocb->acct.read);
+ block_acct_failed(stats, &iocb->acct.write);
+ } else {
+ block_acct_done(stats, &iocb->acct.read);
+ block_acct_done(stats, &iocb->acct.write);
+ }
- if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
- ((prinfor & NVME_RW_PRINFO_PRACT) != (prinfow & NVME_RW_PRINFO_PRACT))) {
- return NVME_INVALID_FIELD | NVME_DNR;
+ iocb->common.cb(iocb->common.opaque, iocb->ret);
+ qemu_aio_unref(iocb);
+}
+
+static void nvme_copy_cb(void *opaque, int ret);
+
+static void nvme_copy_out_completed_cb(void *opaque, int ret)
+{
+ NvmeCopyAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
+ NvmeNamespace *ns = req->ns;
+ NvmeCopySourceRange *range = &iocb->ranges[iocb->idx];
+ uint32_t nlb = le32_to_cpu(range->nlb) + 1;
+
+ if (ret < 0) {
+ iocb->ret = ret;
+ goto out;
+ } else if (iocb->ret < 0) {
+ goto out;
}
- if (!(n->id_ctrl.ocfs & (1 << format))) {
- trace_pci_nvme_err_copy_invalid_format(format);
- return NVME_INVALID_FIELD | NVME_DNR;
+ if (ns->params.zoned) {
+ nvme_advance_zone_wp(ns, iocb->zone, nlb);
}
- if (nr > ns->id_ns.msrc + 1) {
- return NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ iocb->idx++;
+ iocb->slba += nlb;
+out:
+ nvme_copy_cb(iocb, iocb->ret);
+}
+
+static void nvme_copy_out_cb(void *opaque, int ret)
+{
+ NvmeCopyAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
+ NvmeNamespace *ns = req->ns;
+ NvmeCopySourceRange *range;
+ uint32_t nlb;
+ size_t mlen;
+ uint8_t *mbounce;
+
+ if (ret < 0) {
+ iocb->ret = ret;
+ goto out;
+ } else if (iocb->ret < 0) {
+ goto out;
}
- ctx = g_new(struct nvme_copy_ctx, 1);
- ctx->ranges = g_new(NvmeCopySourceRange, nr);
+ if (!ns->lbaf.ms) {
+ nvme_copy_out_completed_cb(iocb, 0);
+ return;
+ }
- status = nvme_h2c(n, (uint8_t *)ctx->ranges,
- nr * sizeof(NvmeCopySourceRange), req);
- if (status) {
+ range = &iocb->ranges[iocb->idx];
+ nlb = le32_to_cpu(range->nlb) + 1;
+
+ mlen = nvme_m2b(ns, nlb);
+ mbounce = iocb->bounce + nvme_l2b(ns, nlb);
+
+ qemu_iovec_reset(&iocb->iov);
+ qemu_iovec_add(&iocb->iov, mbounce, mlen);
+
+ iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_moff(ns, iocb->slba),
+ &iocb->iov, 0, nvme_copy_out_completed_cb,
+ iocb);
+
+ return;
+
+out:
+ nvme_copy_cb(iocb, ret);
+}
+
+static void nvme_copy_in_completed_cb(void *opaque, int ret)
+{
+ NvmeCopyAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
+ NvmeNamespace *ns = req->ns;
+ NvmeCopySourceRange *range;
+ uint32_t nlb;
+ size_t len;
+ uint16_t status;
+
+ if (ret < 0) {
+ iocb->ret = ret;
+ goto out;
+ } else if (iocb->ret < 0) {
goto out;
}
- for (i = 0; i < nr; i++) {
- uint64_t slba = le64_to_cpu(ctx->ranges[i].slba);
- uint32_t _nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1;
+ range = &iocb->ranges[iocb->idx];
+ nlb = le32_to_cpu(range->nlb) + 1;
+ len = nvme_l2b(ns, nlb);
- if (_nlb > le16_to_cpu(ns->id_ns.mssrl)) {
- status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
- goto out;
- }
+ trace_pci_nvme_copy_out(iocb->slba, nlb);
- status = nvme_check_bounds(ns, slba, _nlb);
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+
+ uint16_t prinfor = ((copy->control[0] >> 4) & 0xf);
+ uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
+
+ uint16_t apptag = le16_to_cpu(range->apptag);
+ uint16_t appmask = le16_to_cpu(range->appmask);
+ uint32_t reftag = le32_to_cpu(range->reftag);
+
+ uint64_t slba = le64_to_cpu(range->slba);
+ size_t mlen = nvme_m2b(ns, nlb);
+ uint8_t *mbounce = iocb->bounce + nvme_l2b(ns, nlb);
+
+ status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen, prinfor,
+ slba, apptag, appmask, &reftag);
if (status) {
- goto out;
+ goto invalid;
}
- if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
- status = nvme_check_dulbe(ns, slba, _nlb);
+ apptag = le16_to_cpu(copy->apptag);
+ appmask = le16_to_cpu(copy->appmask);
+
+ if (prinfow & NVME_PRINFO_PRACT) {
+ status = nvme_check_prinfo(ns, prinfow, iocb->slba, iocb->reftag);
if (status) {
- goto out;
+ goto invalid;
}
- }
- if (ns->params.zoned) {
- status = nvme_check_zone_read(ns, slba, _nlb);
+ nvme_dif_pract_generate_dif(ns, iocb->bounce, len, mbounce, mlen,
+ apptag, &iocb->reftag);
+ } else {
+ status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen,
+ prinfow, iocb->slba, apptag, appmask,
+ &iocb->reftag);
if (status) {
- goto out;
+ goto invalid;
}
}
+ }
- nlb += _nlb;
+ status = nvme_check_bounds(ns, iocb->slba, nlb);
+ if (status) {
+ goto invalid;
}
- if (nlb > le32_to_cpu(ns->id_ns.mcl)) {
- status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
- goto out;
+ if (ns->params.zoned) {
+ status = nvme_check_zone_write(ns, iocb->zone, iocb->slba, nlb);
+ if (status) {
+ goto invalid;
+ }
+
+ iocb->zone->w_ptr += nlb;
}
- bounce = bouncep = g_malloc(nvme_l2b(ns, nlb));
- if (ns->lbaf.ms) {
- mbounce = mbouncep = g_malloc(nvme_m2b(ns, nlb));
+ qemu_iovec_reset(&iocb->iov);
+ qemu_iovec_add(&iocb->iov, iocb->bounce, len);
+
+ iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, iocb->slba),
+ &iocb->iov, 0, nvme_copy_out_cb, iocb);
+
+ return;
+
+invalid:
+ req->status = status;
+ iocb->aiocb = NULL;
+ if (iocb->bh) {
+ qemu_bh_schedule(iocb->bh);
}
- block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0,
- BLOCK_ACCT_READ);
+ return;
- ctx->bounce = bounce;
- ctx->mbounce = mbounce;
- ctx->nlb = nlb;
- ctx->copies = 1;
+out:
+ nvme_copy_cb(iocb, ret);
+}
- req->opaque = ctx;
+static void nvme_copy_in_cb(void *opaque, int ret)
+{
+ NvmeCopyAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
+ NvmeNamespace *ns = req->ns;
+ NvmeCopySourceRange *range;
+ uint64_t slba;
+ uint32_t nlb;
- for (i = 0; i < nr; i++) {
- uint64_t slba = le64_to_cpu(ctx->ranges[i].slba);
- uint32_t nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1;
+ if (ret < 0) {
+ iocb->ret = ret;
+ goto out;
+ } else if (iocb->ret < 0) {
+ goto out;
+ }
- size_t len = nvme_l2b(ns, nlb);
- int64_t offset = nvme_l2b(ns, slba);
+ if (!ns->lbaf.ms) {
+ nvme_copy_in_completed_cb(iocb, 0);
+ return;
+ }
- trace_pci_nvme_copy_source_range(slba, nlb);
+ range = &iocb->ranges[iocb->idx];
+ slba = le64_to_cpu(range->slba);
+ nlb = le32_to_cpu(range->nlb) + 1;
- struct nvme_copy_in_ctx *in_ctx = g_new(struct nvme_copy_in_ctx, 1);
- in_ctx->req = req;
+ qemu_iovec_reset(&iocb->iov);
+ qemu_iovec_add(&iocb->iov, iocb->bounce + nvme_l2b(ns, nlb),
+ nvme_m2b(ns, nlb));
- qemu_iovec_init(&in_ctx->iov, 1);
- qemu_iovec_add(&in_ctx->iov, bouncep, len);
+ iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_moff(ns, slba),
+ &iocb->iov, 0, nvme_copy_in_completed_cb,
+ iocb);
+ return;
- ctx->copies++;
+out:
+ nvme_copy_cb(iocb, iocb->ret);
+}
- blk_aio_preadv(ns->blkconf.blk, offset, &in_ctx->iov, 0,
- nvme_aio_copy_in_cb, in_ctx);
+static void nvme_copy_cb(void *opaque, int ret)
+{
+ NvmeCopyAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
+ NvmeNamespace *ns = req->ns;
+ NvmeCopySourceRange *range;
+ uint64_t slba;
+ uint32_t nlb;
+ size_t len;
+ uint16_t status;
- bouncep += len;
+ if (ret < 0) {
+ iocb->ret = ret;
+ goto done;
+ } else if (iocb->ret < 0) {
+ goto done;
+ }
- if (ns->lbaf.ms) {
- len = nvme_m2b(ns, nlb);
- offset = nvme_moff(ns, slba);
+ if (iocb->idx == iocb->nr) {
+ goto done;
+ }
- in_ctx = g_new(struct nvme_copy_in_ctx, 1);
- in_ctx->req = req;
+ range = &iocb->ranges[iocb->idx];
+ slba = le64_to_cpu(range->slba);
+ nlb = le32_to_cpu(range->nlb) + 1;
+ len = nvme_l2b(ns, nlb);
- qemu_iovec_init(&in_ctx->iov, 1);
- qemu_iovec_add(&in_ctx->iov, mbouncep, len);
+ trace_pci_nvme_copy_source_range(slba, nlb);
- ctx->copies++;
+ if (nlb > le16_to_cpu(ns->id_ns.mssrl)) {
+ status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ goto invalid;
+ }
- blk_aio_preadv(ns->blkconf.blk, offset, &in_ctx->iov, 0,
- nvme_aio_copy_in_cb, in_ctx);
+ status = nvme_check_bounds(ns, slba, nlb);
+ if (status) {
+ goto invalid;
+ }
- mbouncep += len;
+ if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
+ status = nvme_check_dulbe(ns, slba, nlb);
+ if (status) {
+ goto invalid;
}
}
- /* account for the 1-initialization */
- ctx->copies--;
+ if (ns->params.zoned) {
+ status = nvme_check_zone_read(ns, slba, nlb);
+ if (status) {
+ goto invalid;
+ }
+ }
+
+ qemu_iovec_reset(&iocb->iov);
+ qemu_iovec_add(&iocb->iov, iocb->bounce, len);
- if (!ctx->copies) {
- nvme_copy_in_complete(req);
+ iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_l2b(ns, slba),
+ &iocb->iov, 0, nvme_copy_in_cb, iocb);
+ return;
+
+invalid:
+ req->status = status;
+done:
+ iocb->aiocb = NULL;
+ if (iocb->bh) {
+ qemu_bh_schedule(iocb->bh);
}
+}
- return NVME_NO_COMPLETE;
-out:
- g_free(ctx->ranges);
- g_free(ctx);
+static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeNamespace *ns = req->ns;
+ NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+ NvmeCopyAIOCB *iocb = blk_aio_get(&nvme_copy_aiocb_info, ns->blkconf.blk,
+ nvme_misc_cb, req);
+ uint16_t nr = copy->nr + 1;
+ uint8_t format = copy->control[0] & 0xf;
+ uint16_t prinfor = ((copy->control[0] >> 4) & 0xf);
+ uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
+ uint16_t status;
+
+ trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format);
+
+ iocb->ranges = NULL;
+ iocb->zone = NULL;
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
+ ((prinfor & NVME_PRINFO_PRACT) != (prinfow & NVME_PRINFO_PRACT))) {
+ status = NVME_INVALID_FIELD | NVME_DNR;
+ goto invalid;
+ }
+
+ if (!(n->id_ctrl.ocfs & (1 << format))) {
+ trace_pci_nvme_err_copy_invalid_format(format);
+ status = NVME_INVALID_FIELD | NVME_DNR;
+ goto invalid;
+ }
+
+ if (nr > ns->id_ns.msrc + 1) {
+ status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ goto invalid;
+ }
+
+ iocb->ranges = g_new(NvmeCopySourceRange, nr);
+
+ status = nvme_h2c(n, (uint8_t *)iocb->ranges,
+ sizeof(NvmeCopySourceRange) * nr, req);
+ if (status) {
+ goto invalid;
+ }
+
+ iocb->slba = le64_to_cpu(copy->sdlba);
+
+ if (ns->params.zoned) {
+ iocb->zone = nvme_get_zone_by_slba(ns, iocb->slba);
+ if (!iocb->zone) {
+ status = NVME_LBA_RANGE | NVME_DNR;
+ goto invalid;
+ }
+
+ status = nvme_zrm_auto(n, ns, iocb->zone);
+ if (status) {
+ goto invalid;
+ }
+ }
+
+ iocb->req = req;
+ iocb->bh = qemu_bh_new(nvme_copy_bh, iocb);
+ iocb->ret = 0;
+ iocb->nr = nr;
+ iocb->idx = 0;
+ iocb->reftag = le32_to_cpu(copy->reftag);
+ iocb->bounce = g_malloc_n(le16_to_cpu(ns->id_ns.mssrl),
+ ns->lbasz + ns->lbaf.ms);
+
+ qemu_iovec_init(&iocb->iov, 1);
+
+ block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.read, 0,
+ BLOCK_ACCT_READ);
+ block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.write, 0,
+ BLOCK_ACCT_WRITE);
+
+ req->aiocb = &iocb->common;
+ nvme_copy_cb(iocb, 0);
+
+ return NVME_NO_COMPLETE;
+
+invalid:
+ g_free(iocb->ranges);
+ qemu_aio_unref(iocb);
return status;
}
@@ -2803,7 +2851,7 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
BlockBackend *blk = ns->blkconf.blk;
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
- uint16_t ctrl = le16_to_cpu(rw->control);
+ uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
size_t data_len = nvme_l2b(ns, nlb);
size_t len = data_len;
int64_t offset = nvme_l2b(ns, slba);
@@ -2812,7 +2860,7 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
trace_pci_nvme_compare(nvme_cid(req), nvme_nsid(ns), slba, nlb);
- if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (ctrl & NVME_RW_PRINFO_PRACT)) {
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (prinfo & NVME_PRINFO_PRACT)) {
return NVME_INVALID_PROT_INFO | NVME_DNR;
}
@@ -2858,57 +2906,139 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
return NVME_NO_COMPLETE;
}
-static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
-{
- uint32_t nsid = le32_to_cpu(req->cmd.nsid);
- uintptr_t *num_flushes = (uintptr_t *)&req->opaque;
- uint16_t status;
- struct nvme_aio_flush_ctx *ctx;
+typedef struct NvmeFlushAIOCB {
+ BlockAIOCB common;
+ BlockAIOCB *aiocb;
+ NvmeRequest *req;
+ QEMUBH *bh;
+ int ret;
+
NvmeNamespace *ns;
+ uint32_t nsid;
+ bool broadcast;
+} NvmeFlushAIOCB;
- trace_pci_nvme_flush(nvme_cid(req), nsid);
+static void nvme_flush_cancel(BlockAIOCB *acb)
+{
+ NvmeFlushAIOCB *iocb = container_of(acb, NvmeFlushAIOCB, common);
- if (nsid != NVME_NSID_BROADCAST) {
- req->ns = nvme_ns(n, nsid);
- if (unlikely(!req->ns)) {
- return NVME_INVALID_FIELD | NVME_DNR;
- }
+ iocb->ret = -ECANCELED;
- block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0,
- BLOCK_ACCT_FLUSH);
- req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_misc_cb, req);
- return NVME_NO_COMPLETE;
+ if (iocb->aiocb) {
+ blk_aio_cancel_async(iocb->aiocb);
}
+}
- /* 1-initialize; see comment in nvme_dsm */
- *num_flushes = 1;
+static const AIOCBInfo nvme_flush_aiocb_info = {
+ .aiocb_size = sizeof(NvmeFlushAIOCB),
+ .cancel_async = nvme_flush_cancel,
+ .get_aio_context = nvme_get_aio_context,
+};
- for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- ns = nvme_ns(n, i);
- if (!ns) {
- continue;
- }
+static void nvme_flush_ns_cb(void *opaque, int ret)
+{
+ NvmeFlushAIOCB *iocb = opaque;
+ NvmeNamespace *ns = iocb->ns;
- ctx = g_new(struct nvme_aio_flush_ctx, 1);
- ctx->req = req;
- ctx->ns = ns;
+ if (ret < 0) {
+ iocb->ret = ret;
+ goto out;
+ } else if (iocb->ret < 0) {
+ goto out;
+ }
- (*num_flushes)++;
+ if (ns) {
+ trace_pci_nvme_flush_ns(iocb->nsid);
- block_acct_start(blk_get_stats(ns->blkconf.blk), &ctx->acct, 0,
- BLOCK_ACCT_FLUSH);
- blk_aio_flush(ns->blkconf.blk, nvme_aio_flush_cb, ctx);
+ iocb->ns = NULL;
+ iocb->aiocb = blk_aio_flush(ns->blkconf.blk, nvme_flush_ns_cb, iocb);
+ return;
}
- /* account for the 1-initialization */
- (*num_flushes)--;
+out:
+ iocb->aiocb = NULL;
+ qemu_bh_schedule(iocb->bh);
+}
- if (*num_flushes) {
- status = NVME_NO_COMPLETE;
- } else {
- status = req->status;
+static void nvme_flush_bh(void *opaque)
+{
+ NvmeFlushAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
+ NvmeCtrl *n = nvme_ctrl(req);
+ int i;
+
+ if (iocb->ret < 0) {
+ goto done;
}
+ if (iocb->broadcast) {
+ for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) {
+ iocb->ns = nvme_ns(n, i);
+ if (iocb->ns) {
+ iocb->nsid = i;
+ break;
+ }
+ }
+ }
+
+ if (!iocb->ns) {
+ goto done;
+ }
+
+ nvme_flush_ns_cb(iocb, 0);
+ return;
+
+done:
+ qemu_bh_delete(iocb->bh);
+ iocb->bh = NULL;
+
+ iocb->common.cb(iocb->common.opaque, iocb->ret);
+
+ qemu_aio_unref(iocb);
+
+ return;
+}
+
+static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeFlushAIOCB *iocb;
+ uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+ uint16_t status;
+
+ iocb = qemu_aio_get(&nvme_flush_aiocb_info, NULL, nvme_misc_cb, req);
+
+ iocb->req = req;
+ iocb->bh = qemu_bh_new(nvme_flush_bh, iocb);
+ iocb->ret = 0;
+ iocb->ns = NULL;
+ iocb->nsid = 0;
+ iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
+
+ if (!iocb->broadcast) {
+ if (!nvme_nsid_valid(n, nsid)) {
+ status = NVME_INVALID_NSID | NVME_DNR;
+ goto out;
+ }
+
+ iocb->ns = nvme_ns(n, nsid);
+ if (!iocb->ns) {
+ status = NVME_INVALID_FIELD | NVME_DNR;
+ goto out;
+ }
+
+ iocb->nsid = nsid;
+ }
+
+ req->aiocb = &iocb->common;
+ qemu_bh_schedule(iocb->bh);
+
+ return NVME_NO_COMPLETE;
+
+out:
+ qemu_bh_delete(iocb->bh);
+ iocb->bh = NULL;
+ qemu_aio_unref(iocb);
+
return status;
}
@@ -2918,7 +3048,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
NvmeNamespace *ns = req->ns;
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
- uint16_t ctrl = le16_to_cpu(rw->control);
+ uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
uint64_t data_size = nvme_l2b(ns, nlb);
uint64_t mapped_size = data_size;
uint64_t data_offset;
@@ -2929,7 +3059,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
mapped_size += nvme_m2b(ns, nlb);
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
- bool pract = ctrl & NVME_RW_PRINFO_PRACT;
+ bool pract = prinfo & NVME_PRINFO_PRACT;
if (pract && ns->lbaf.ms == 8) {
mapped_size = data_size;
@@ -2993,6 +3123,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
uint16_t ctrl = le16_to_cpu(rw->control);
+ uint8_t prinfo = NVME_RW_PRINFO(ctrl);
uint64_t data_size = nvme_l2b(ns, nlb);
uint64_t mapped_size = data_size;
uint64_t data_offset;
@@ -3005,7 +3136,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
mapped_size += nvme_m2b(ns, nlb);
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
- bool pract = ctrl & NVME_RW_PRINFO_PRACT;
+ bool pract = prinfo & NVME_PRINFO_PRACT;
if (pract && ns->lbaf.ms == 8) {
mapped_size -= nvme_m2b(ns, nlb);
@@ -3030,6 +3161,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
if (ns->params.zoned) {
zone = nvme_get_zone_by_slba(ns, slba);
+ assert(zone);
if (append) {
bool piremap = !!(ctrl & NVME_RW_PIREMAP);
@@ -3080,7 +3212,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
goto invalid;
}
- status = nvme_zrm_auto(ns, zone);
+ status = nvme_zrm_auto(n, ns, zone);
if (status) {
goto invalid;
}
@@ -3169,7 +3301,7 @@ enum NvmeZoneProcessingMask {
static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone,
NvmeZoneState state, NvmeRequest *req)
{
- return nvme_zrm_open(ns, zone);
+ return nvme_zrm_open(nvme_ctrl(req), ns, zone);
}
static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone,
@@ -3184,41 +3316,6 @@ static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone,
return nvme_zrm_finish(ns, zone);
}
-static uint16_t nvme_reset_zone(NvmeNamespace *ns, NvmeZone *zone,
- NvmeZoneState state, NvmeRequest *req)
-{
- uintptr_t *resets = (uintptr_t *)&req->opaque;
- struct nvme_zone_reset_ctx *ctx;
-
- switch (state) {
- case NVME_ZONE_STATE_EMPTY:
- return NVME_SUCCESS;
- case NVME_ZONE_STATE_EXPLICITLY_OPEN:
- case NVME_ZONE_STATE_IMPLICITLY_OPEN:
- case NVME_ZONE_STATE_CLOSED:
- case NVME_ZONE_STATE_FULL:
- break;
- default:
- return NVME_ZONE_INVAL_TRANSITION;
- }
-
- /*
- * The zone reset aio callback needs to know the zone that is being reset
- * in order to transition the zone on completion.
- */
- ctx = g_new(struct nvme_zone_reset_ctx, 1);
- ctx->req = req;
- ctx->zone = zone;
-
- (*resets)++;
-
- blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_l2b(ns, zone->d.zslba),
- nvme_l2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP,
- nvme_aio_zone_reset_cb, ctx);
-
- return NVME_NO_COMPLETE;
-}
-
static uint16_t nvme_offline_zone(NvmeNamespace *ns, NvmeZone *zone,
NvmeZoneState state, NvmeRequest *req)
{
@@ -3347,12 +3444,144 @@ out:
return status;
}
+typedef struct NvmeZoneResetAIOCB {
+ BlockAIOCB common;
+ BlockAIOCB *aiocb;
+ NvmeRequest *req;
+ QEMUBH *bh;
+ int ret;
+
+ bool all;
+ int idx;
+ NvmeZone *zone;
+} NvmeZoneResetAIOCB;
+
+static void nvme_zone_reset_cancel(BlockAIOCB *aiocb)
+{
+ NvmeZoneResetAIOCB *iocb = container_of(aiocb, NvmeZoneResetAIOCB, common);
+ NvmeRequest *req = iocb->req;
+ NvmeNamespace *ns = req->ns;
+
+ iocb->idx = ns->num_zones;
+
+ iocb->ret = -ECANCELED;
+
+ if (iocb->aiocb) {
+ blk_aio_cancel_async(iocb->aiocb);
+ iocb->aiocb = NULL;
+ }
+}
+
+static const AIOCBInfo nvme_zone_reset_aiocb_info = {
+ .aiocb_size = sizeof(NvmeZoneResetAIOCB),
+ .cancel_async = nvme_zone_reset_cancel,
+};
+
+static void nvme_zone_reset_bh(void *opaque)
+{
+ NvmeZoneResetAIOCB *iocb = opaque;
+
+ iocb->common.cb(iocb->common.opaque, iocb->ret);
+
+ qemu_bh_delete(iocb->bh);
+ iocb->bh = NULL;
+ qemu_aio_unref(iocb);
+}
+
+static void nvme_zone_reset_cb(void *opaque, int ret);
+
+static void nvme_zone_reset_epilogue_cb(void *opaque, int ret)
+{
+ NvmeZoneResetAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
+ NvmeNamespace *ns = req->ns;
+ int64_t moff;
+ int count;
+
+ if (ret < 0) {
+ nvme_zone_reset_cb(iocb, ret);
+ return;
+ }
+
+ if (!ns->lbaf.ms) {
+ nvme_zone_reset_cb(iocb, 0);
+ return;
+ }
+
+ moff = nvme_moff(ns, iocb->zone->d.zslba);
+ count = nvme_m2b(ns, ns->zone_size);
+
+ iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, moff, count,
+ BDRV_REQ_MAY_UNMAP,
+ nvme_zone_reset_cb, iocb);
+ return;
+}
+
+static void nvme_zone_reset_cb(void *opaque, int ret)
+{
+ NvmeZoneResetAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
+ NvmeNamespace *ns = req->ns;
+
+ if (ret < 0) {
+ iocb->ret = ret;
+ goto done;
+ }
+
+ if (iocb->zone) {
+ nvme_zrm_reset(ns, iocb->zone);
+
+ if (!iocb->all) {
+ goto done;
+ }
+ }
+
+ while (iocb->idx < ns->num_zones) {
+ NvmeZone *zone = &ns->zone_array[iocb->idx++];
+
+ switch (nvme_get_zone_state(zone)) {
+ case NVME_ZONE_STATE_EMPTY:
+ if (!iocb->all) {
+ goto done;
+ }
+
+ continue;
+
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ case NVME_ZONE_STATE_CLOSED:
+ case NVME_ZONE_STATE_FULL:
+ iocb->zone = zone;
+ break;
+
+ default:
+ continue;
+ }
+
+ trace_pci_nvme_zns_zone_reset(zone->d.zslba);
+
+ iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk,
+ nvme_l2b(ns, zone->d.zslba),
+ nvme_l2b(ns, ns->zone_size),
+ BDRV_REQ_MAY_UNMAP,
+ nvme_zone_reset_epilogue_cb,
+ iocb);
+ return;
+ }
+
+done:
+ iocb->aiocb = NULL;
+ if (iocb->bh) {
+ qemu_bh_schedule(iocb->bh);
+ }
+}
+
static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
{
NvmeCmd *cmd = (NvmeCmd *)&req->cmd;
NvmeNamespace *ns = req->ns;
NvmeZone *zone;
- uintptr_t *resets;
+ NvmeZoneResetAIOCB *iocb;
uint8_t *zd_ext;
uint32_t dw13 = le32_to_cpu(cmd->cdw13);
uint64_t slba = 0;
@@ -3363,7 +3592,7 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
enum NvmeZoneProcessingMask proc_mask = NVME_PROC_CURRENT_ZONE;
action = dw13 & 0xff;
- all = dw13 & 0x100;
+ all = !!(dw13 & 0x100);
req->status = NVME_SUCCESS;
@@ -3407,21 +3636,22 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
break;
case NVME_ZONE_ACTION_RESET:
- resets = (uintptr_t *)&req->opaque;
-
- if (all) {
- proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES |
- NVME_PROC_FULL_ZONES;
- }
trace_pci_nvme_reset_zone(slba, zone_idx, all);
- *resets = 1;
+ iocb = blk_aio_get(&nvme_zone_reset_aiocb_info, ns->blkconf.blk,
+ nvme_misc_cb, req);
- status = nvme_do_zone_op(ns, zone, proc_mask, nvme_reset_zone, req);
+ iocb->req = req;
+ iocb->bh = qemu_bh_new(nvme_zone_reset_bh, iocb);
+ iocb->ret = 0;
+ iocb->all = all;
+ iocb->idx = zone_idx;
+ iocb->zone = NULL;
- (*resets)--;
+ req->aiocb = &iocb->common;
+ nvme_zone_reset_cb(iocb, 0);
- return *resets ? NVME_NO_COMPLETE : req->status;
+ return NVME_NO_COMPLETE;
case NVME_ZONE_ACTION_OFFLINE:
if (all) {
@@ -3695,7 +3925,6 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
NvmeSQueue *sq;
NvmeCQueue *cq;
uint16_t qid = le16_to_cpu(c->qid);
- uint32_t nsid;
if (unlikely(!qid || nvme_check_sqid(n, qid))) {
trace_pci_nvme_err_invalid_del_sq(qid);
@@ -3707,22 +3936,8 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
sq = n->sq[qid];
while (!QTAILQ_EMPTY(&sq->out_req_list)) {
r = QTAILQ_FIRST(&sq->out_req_list);
- if (r->aiocb) {
- blk_aio_cancel(r->aiocb);
- }
- }
-
- /*
- * Drain all namespaces if there are still outstanding requests that we
- * could not cancel explicitly.
- */
- if (!QTAILQ_EMPTY(&sq->out_req_list)) {
- for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
- NvmeNamespace *ns = nvme_ns(n, nsid);
- if (ns) {
- nvme_ns_drain(ns);
- }
- }
+ assert(r->aiocb);
+ blk_aio_cancel(r->aiocb);
}
assert(QTAILQ_EMPTY(&sq->out_req_list));
@@ -4089,6 +4304,11 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req)
trace_pci_nvme_err_invalid_del_cq_notempty(qid);
return NVME_INVALID_QUEUE_DEL;
}
+
+ if (cq->irq_enabled && cq->tail != cq->head) {
+ n->cq_pending--;
+ }
+
nvme_irq_deassert(n, cq);
trace_pci_nvme_del_cq(qid);
nvme_free_cq(cq, n);
@@ -4178,16 +4398,6 @@ static uint16_t nvme_rpt_empty_id_struct(NvmeCtrl *n, NvmeRequest *req)
return nvme_c2h(n, id, sizeof(id), req);
}
-static inline bool nvme_csi_has_nvm_support(NvmeNamespace *ns)
-{
- switch (ns->csi) {
- case NVME_CSI_NVM:
- case NVME_CSI_ZONED:
- return true;
- }
- return false;
-}
-
static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req)
{
trace_pci_nvme_identify_ctrl();
@@ -4244,16 +4454,18 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active)
}
}
- if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) {
+ if (active || ns->csi == NVME_CSI_NVM) {
return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req);
}
return NVME_INVALID_CMD_SET | NVME_DNR;
}
-static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req)
+static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req,
+ bool attached)
{
NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
+ uint32_t nsid = le32_to_cpu(c->nsid);
uint16_t min_id = le16_to_cpu(c->ctrlid);
uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
uint16_t *ids = &list[1];
@@ -4261,15 +4473,21 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req)
NvmeCtrl *ctrl;
int cntlid, nr_ids = 0;
- trace_pci_nvme_identify_ns_attached_list(min_id);
+ trace_pci_nvme_identify_ctrl_list(c->cns, min_id);
- if (c->nsid == NVME_NSID_BROADCAST) {
+ if (!n->subsys) {
return NVME_INVALID_FIELD | NVME_DNR;
}
- ns = nvme_subsys_ns(n->subsys, c->nsid);
- if (!ns) {
- return NVME_INVALID_FIELD | NVME_DNR;
+ if (attached) {
+ if (nsid == NVME_NSID_BROADCAST) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ ns = nvme_subsys_ns(n->subsys, nsid);
+ if (!ns) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
}
for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) {
@@ -4278,7 +4496,7 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req)
continue;
}
- if (!nvme_ns(ctrl, c->nsid)) {
+ if (attached && !nvme_ns(ctrl, nsid)) {
continue;
}
@@ -4291,7 +4509,7 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req)
}
static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
- bool active)
+ bool active)
{
NvmeNamespace *ns;
NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
@@ -4315,7 +4533,7 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
}
}
- if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) {
+ if (c->csi == NVME_CSI_NVM) {
return nvme_rpt_empty_id_struct(n, req);
} else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) {
return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned),
@@ -4326,7 +4544,7 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
}
static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req,
- bool active)
+ bool active)
{
NvmeNamespace *ns;
NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
@@ -4373,7 +4591,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req,
}
static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req,
- bool active)
+ bool active)
{
NvmeNamespace *ns;
NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
@@ -4426,19 +4644,19 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
uint32_t nsid = le32_to_cpu(c->nsid);
uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
-
- struct data {
- struct {
- NvmeIdNsDescr hdr;
- uint8_t v[NVME_NIDL_UUID];
- } uuid;
- struct {
- NvmeIdNsDescr hdr;
- uint8_t v;
- } csi;
- };
-
- struct data *ns_descrs = (struct data *)list;
+ uint8_t *pos = list;
+ struct {
+ NvmeIdNsDescr hdr;
+ uint8_t v[NVME_NIDL_UUID];
+ } QEMU_PACKED uuid;
+ struct {
+ NvmeIdNsDescr hdr;
+ uint64_t v;
+ } QEMU_PACKED eui64;
+ struct {
+ NvmeIdNsDescr hdr;
+ uint8_t v;
+ } QEMU_PACKED csi;
trace_pci_nvme_identify_ns_descr_list(nsid);
@@ -4452,17 +4670,29 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
}
/*
- * Because the NGUID and EUI64 fields are 0 in the Identify Namespace data
- * structure, a Namespace UUID (nidt = 3h) must be reported in the
- * Namespace Identification Descriptor. Add the namespace UUID here.
+ * If the EUI-64 field is 0 and the NGUID field is 0, the namespace must
+ * provide a valid Namespace UUID in the Namespace Identification Descriptor
+ * data structure. QEMU does not yet support setting NGUID.
*/
- ns_descrs->uuid.hdr.nidt = NVME_NIDT_UUID;
- ns_descrs->uuid.hdr.nidl = NVME_NIDL_UUID;
- memcpy(&ns_descrs->uuid.v, ns->params.uuid.data, NVME_NIDL_UUID);
-
- ns_descrs->csi.hdr.nidt = NVME_NIDT_CSI;
- ns_descrs->csi.hdr.nidl = NVME_NIDL_CSI;
- ns_descrs->csi.v = ns->csi;
+ uuid.hdr.nidt = NVME_NIDT_UUID;
+ uuid.hdr.nidl = NVME_NIDL_UUID;
+ memcpy(uuid.v, ns->params.uuid.data, NVME_NIDL_UUID);
+ memcpy(pos, &uuid, sizeof(uuid));
+ pos += sizeof(uuid);
+
+ if (ns->params.eui64) {
+ eui64.hdr.nidt = NVME_NIDT_EUI64;
+ eui64.hdr.nidl = NVME_NIDL_EUI64;
+ eui64.v = cpu_to_be64(ns->params.eui64);
+ memcpy(pos, &eui64, sizeof(eui64));
+ pos += sizeof(eui64);
+ }
+
+ csi.hdr.nidt = NVME_NIDT_CSI;
+ csi.hdr.nidl = NVME_NIDL_CSI;
+ csi.v = ns->csi;
+ memcpy(pos, &csi, sizeof(csi));
+ pos += sizeof(csi);
return nvme_c2h(n, list, sizeof(list), req);
}
@@ -4493,7 +4723,9 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req)
case NVME_ID_CNS_NS_PRESENT:
return nvme_identify_ns(n, req, false);
case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST:
- return nvme_identify_ns_attached_list(n, req);
+ return nvme_identify_ctrl_list(n, req, true);
+ case NVME_ID_CNS_CTRL_LIST:
+ return nvme_identify_ctrl_list(n, req, false);
case NVME_ID_CNS_CS_NS:
return nvme_identify_ns_csi(n, req, true);
case NVME_ID_CNS_CS_NS_PRESENT:
@@ -5011,138 +5243,195 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
return NVME_SUCCESS;
}
-static uint16_t nvme_format_ns(NvmeCtrl *n, NvmeNamespace *ns, uint8_t lbaf,
- uint8_t mset, uint8_t pi, uint8_t pil,
- NvmeRequest *req)
-{
- int64_t len, offset;
- struct nvme_aio_format_ctx *ctx;
- BlockBackend *blk = ns->blkconf.blk;
- uint16_t ms;
- uintptr_t *num_formats = (uintptr_t *)&req->opaque;
- int *count;
-
- if (ns->params.zoned) {
- return NVME_INVALID_FORMAT | NVME_DNR;
- }
+typedef struct NvmeFormatAIOCB {
+ BlockAIOCB common;
+ BlockAIOCB *aiocb;
+ QEMUBH *bh;
+ NvmeRequest *req;
+ int ret;
- trace_pci_nvme_format_ns(nvme_cid(req), nvme_nsid(ns), lbaf, mset, pi, pil);
+ NvmeNamespace *ns;
+ uint32_t nsid;
+ bool broadcast;
+ int64_t offset;
+} NvmeFormatAIOCB;
- if (lbaf > ns->id_ns.nlbaf) {
- return NVME_INVALID_FORMAT | NVME_DNR;
- }
+static void nvme_format_bh(void *opaque);
- ms = ns->id_ns.lbaf[lbaf].ms;
+static void nvme_format_cancel(BlockAIOCB *aiocb)
+{
+ NvmeFormatAIOCB *iocb = container_of(aiocb, NvmeFormatAIOCB, common);
- if (pi && (ms < sizeof(NvmeDifTuple))) {
- return NVME_INVALID_FORMAT | NVME_DNR;
+ if (iocb->aiocb) {
+ blk_aio_cancel_async(iocb->aiocb);
}
+}
- if (pi && pi > NVME_ID_NS_DPS_TYPE_3) {
- return NVME_INVALID_FIELD | NVME_DNR;
- }
+static const AIOCBInfo nvme_format_aiocb_info = {
+ .aiocb_size = sizeof(NvmeFormatAIOCB),
+ .cancel_async = nvme_format_cancel,
+ .get_aio_context = nvme_get_aio_context,
+};
+
+static void nvme_format_set(NvmeNamespace *ns, NvmeCmd *cmd)
+{
+ uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+ uint8_t lbaf = dw10 & 0xf;
+ uint8_t pi = (dw10 >> 5) & 0x7;
+ uint8_t mset = (dw10 >> 4) & 0x1;
+ uint8_t pil = (dw10 >> 8) & 0x1;
- nvme_ns_drain(ns);
- nvme_ns_shutdown(ns);
- nvme_ns_cleanup(ns);
+ trace_pci_nvme_format_set(ns->params.nsid, lbaf, mset, pi, pil);
ns->id_ns.dps = (pil << 3) | pi;
ns->id_ns.flbas = lbaf | (mset << 4);
nvme_ns_init_format(ns);
+}
- ns->status = NVME_FORMAT_IN_PROGRESS;
+static void nvme_format_ns_cb(void *opaque, int ret)
+{
+ NvmeFormatAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
+ NvmeNamespace *ns = iocb->ns;
+ int bytes;
- len = ns->size;
- offset = 0;
+ if (ret < 0) {
+ iocb->ret = ret;
+ goto done;
+ }
- count = g_new(int, 1);
- *count = 1;
+ assert(ns);
- (*num_formats)++;
+ if (iocb->offset < ns->size) {
+ bytes = MIN(BDRV_REQUEST_MAX_BYTES, ns->size - iocb->offset);
- while (len) {
- ctx = g_new(struct nvme_aio_format_ctx, 1);
- ctx->req = req;
- ctx->ns = ns;
- ctx->count = count;
+ iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, iocb->offset,
+ bytes, BDRV_REQ_MAY_UNMAP,
+ nvme_format_ns_cb, iocb);
- size_t bytes = MIN(BDRV_REQUEST_MAX_BYTES, len);
+ iocb->offset += bytes;
+ return;
+ }
- (*count)++;
+ nvme_format_set(ns, &req->cmd);
+ ns->status = 0x0;
+ iocb->ns = NULL;
+ iocb->offset = 0;
- blk_aio_pwrite_zeroes(blk, offset, bytes, BDRV_REQ_MAY_UNMAP,
- nvme_aio_format_cb, ctx);
+done:
+ iocb->aiocb = NULL;
+ qemu_bh_schedule(iocb->bh);
+}
- offset += bytes;
- len -= bytes;
+static uint16_t nvme_format_check(NvmeNamespace *ns, uint8_t lbaf, uint8_t pi)
+{
+ if (ns->params.zoned) {
+ return NVME_INVALID_FORMAT | NVME_DNR;
+ }
+ if (lbaf > ns->id_ns.nlbaf) {
+ return NVME_INVALID_FORMAT | NVME_DNR;
}
- if (--(*count)) {
- return NVME_NO_COMPLETE;
+ if (pi && (ns->id_ns.lbaf[lbaf].ms < sizeof(NvmeDifTuple))) {
+ return NVME_INVALID_FORMAT | NVME_DNR;
}
- g_free(count);
- ns->status = 0x0;
- (*num_formats)--;
+ if (pi && pi > NVME_ID_NS_DPS_TYPE_3) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
return NVME_SUCCESS;
}
-static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req)
+static void nvme_format_bh(void *opaque)
{
- NvmeNamespace *ns;
+ NvmeFormatAIOCB *iocb = opaque;
+ NvmeRequest *req = iocb->req;
+ NvmeCtrl *n = nvme_ctrl(req);
uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
- uint32_t nsid = le32_to_cpu(req->cmd.nsid);
uint8_t lbaf = dw10 & 0xf;
- uint8_t mset = (dw10 >> 4) & 0x1;
uint8_t pi = (dw10 >> 5) & 0x7;
- uint8_t pil = (dw10 >> 8) & 0x1;
- uintptr_t *num_formats = (uintptr_t *)&req->opaque;
uint16_t status;
int i;
- trace_pci_nvme_format(nvme_cid(req), nsid, lbaf, mset, pi, pil);
-
- /* 1-initialize; see the comment in nvme_dsm */
- *num_formats = 1;
+ if (iocb->ret < 0) {
+ goto done;
+ }
- if (nsid != NVME_NSID_BROADCAST) {
- if (!nvme_nsid_valid(n, nsid)) {
- return NVME_INVALID_NSID | NVME_DNR;
+ if (iocb->broadcast) {
+ for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) {
+ iocb->ns = nvme_ns(n, i);
+ if (iocb->ns) {
+ iocb->nsid = i;
+ break;
+ }
}
+ }
- ns = nvme_ns(n, nsid);
- if (!ns) {
- return NVME_INVALID_FIELD | NVME_DNR;
- }
+ if (!iocb->ns) {
+ goto done;
+ }
- status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req);
- if (status && status != NVME_NO_COMPLETE) {
- req->status = status;
+ status = nvme_format_check(iocb->ns, lbaf, pi);
+ if (status) {
+ req->status = status;
+ goto done;
+ }
+
+ iocb->ns->status = NVME_FORMAT_IN_PROGRESS;
+ nvme_format_ns_cb(iocb, 0);
+ return;
+
+done:
+ qemu_bh_delete(iocb->bh);
+ iocb->bh = NULL;
+
+ iocb->common.cb(iocb->common.opaque, iocb->ret);
+
+ qemu_aio_unref(iocb);
+}
+
+static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeFormatAIOCB *iocb;
+ uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+ uint16_t status;
+
+ iocb = qemu_aio_get(&nvme_format_aiocb_info, NULL, nvme_misc_cb, req);
+
+ iocb->req = req;
+ iocb->bh = qemu_bh_new(nvme_format_bh, iocb);
+ iocb->ret = 0;
+ iocb->ns = NULL;
+ iocb->nsid = 0;
+ iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
+ iocb->offset = 0;
+
+ if (!iocb->broadcast) {
+ if (!nvme_nsid_valid(n, nsid)) {
+ status = NVME_INVALID_NSID | NVME_DNR;
+ goto out;
}
- } else {
- for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- ns = nvme_ns(n, i);
- if (!ns) {
- continue;
- }
- status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req);
- if (status && status != NVME_NO_COMPLETE) {
- req->status = status;
- break;
- }
+ iocb->ns = nvme_ns(n, nsid);
+ if (!iocb->ns) {
+ status = NVME_INVALID_FIELD | NVME_DNR;
+ goto out;
}
}
- /* account for the 1-initialization */
- if (--(*num_formats)) {
- return NVME_NO_COMPLETE;
- }
+ req->aiocb = &iocb->common;
+ qemu_bh_schedule(iocb->bh);
- return req->status;
+ return NVME_NO_COMPLETE;
+
+out:
+ qemu_bh_delete(iocb->bh);
+ iocb->bh = NULL;
+ qemu_aio_unref(iocb);
+ return status;
}
static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
@@ -5583,6 +5872,10 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
"invalid write to PMRCAP register, ignored");
return;
case 0xe04: /* PMRCTL */
+ if (!NVME_CAP_PMRS(n->bar.cap)) {
+ return;
+ }
+
n->bar.pmrctl = data;
if (NVME_PMRCTL_EN(data)) {
memory_region_set_enabled(&n->pmr.dev->mr, true);
@@ -5758,6 +6051,10 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
}
if (cq->tail == cq->head) {
+ if (cq->irq_enabled) {
+ n->cq_pending--;
+ }
+
nvme_irq_deassert(n, cq);
}
} else {
@@ -6259,6 +6556,8 @@ static Property nvme_props[] = {
DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
+ DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
+ params.auto_transition_zones, true),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c
index 88efcbe9bd..5dbd18b2a4 100644
--- a/hw/nvme/dif.c
+++ b/hw/nvme/dif.c
@@ -15,11 +15,11 @@
#include "nvme.h"
#include "trace.h"
-uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba,
+uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
uint32_t reftag)
{
if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) &&
- (ctrl & NVME_RW_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) {
+ (prinfo & NVME_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) {
return NVME_INVALID_PROT_INFO | NVME_DNR;
}
@@ -41,7 +41,7 @@ static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer,
void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
uint8_t *mbuf, size_t mlen, uint16_t apptag,
- uint32_t reftag)
+ uint32_t *reftag)
{
uint8_t *end = buf + len;
int16_t pil = 0;
@@ -51,7 +51,7 @@ void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
}
trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil,
- apptag, reftag);
+ apptag, *reftag);
for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
@@ -63,17 +63,17 @@ void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
dif->guard = cpu_to_be16(crc);
dif->apptag = cpu_to_be16(apptag);
- dif->reftag = cpu_to_be32(reftag);
+ dif->reftag = cpu_to_be32(*reftag);
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
- reftag++;
+ (*reftag)++;
}
}
}
static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
uint8_t *buf, uint8_t *mbuf, size_t pil,
- uint16_t ctrl, uint16_t apptag,
+ uint8_t prinfo, uint16_t apptag,
uint16_t appmask, uint32_t reftag)
{
switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
@@ -95,7 +95,7 @@ static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
return NVME_SUCCESS;
}
- if (ctrl & NVME_RW_PRINFO_PRCHK_GUARD) {
+ if (prinfo & NVME_PRINFO_PRCHK_GUARD) {
uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz);
if (pil) {
@@ -109,7 +109,7 @@ static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
}
}
- if (ctrl & NVME_RW_PRINFO_PRCHK_APP) {
+ if (prinfo & NVME_PRINFO_PRCHK_APP) {
trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag,
appmask);
@@ -118,7 +118,7 @@ static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
}
}
- if (ctrl & NVME_RW_PRINFO_PRCHK_REF) {
+ if (prinfo & NVME_PRINFO_PRCHK_REF) {
trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag);
if (be32_to_cpu(dif->reftag) != reftag) {
@@ -130,15 +130,15 @@ static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
}
uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
- uint8_t *mbuf, size_t mlen, uint16_t ctrl,
+ uint8_t *mbuf, size_t mlen, uint8_t prinfo,
uint64_t slba, uint16_t apptag,
- uint16_t appmask, uint32_t reftag)
+ uint16_t appmask, uint32_t *reftag)
{
uint8_t *end = buf + len;
int16_t pil = 0;
uint16_t status;
- status = nvme_check_prinfo(ns, ctrl, slba, reftag);
+ status = nvme_check_prinfo(ns, prinfo, slba, *reftag);
if (status) {
return status;
}
@@ -147,19 +147,19 @@ uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
}
- trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl), ns->lbasz + pil);
+ trace_pci_nvme_dif_check(prinfo, ns->lbasz + pil);
for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
- status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, ctrl, apptag,
- appmask, reftag);
+ status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, prinfo, apptag,
+ appmask, *reftag);
if (status) {
return status;
}
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
- reftag++;
+ (*reftag)++;
}
}
@@ -248,14 +248,14 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret)
NvmeCtrl *n = nvme_ctrl(req);
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
uint64_t slba = le64_to_cpu(rw->slba);
- uint16_t ctrl = le16_to_cpu(rw->control);
+ uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
uint16_t apptag = le16_to_cpu(rw->apptag);
uint16_t appmask = le16_to_cpu(rw->appmask);
uint32_t reftag = le32_to_cpu(rw->reftag);
uint16_t status;
- trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag,
- appmask, reftag);
+ trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), prinfo, apptag, appmask,
+ reftag);
if (ret) {
goto out;
@@ -269,8 +269,8 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret)
}
status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
- ctx->mdata.bounce, ctx->mdata.iov.size, ctrl,
- slba, apptag, appmask, reftag);
+ ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
+ slba, apptag, appmask, &reftag);
if (status) {
req->status = status;
goto out;
@@ -283,7 +283,7 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret)
goto out;
}
- if (ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8) {
+ if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == 8) {
goto out;
}
@@ -364,15 +364,15 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
size_t mlen = nvme_m2b(ns, nlb);
size_t mapped_len = len;
int64_t offset = nvme_l2b(ns, slba);
- uint16_t ctrl = le16_to_cpu(rw->control);
+ uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
uint16_t apptag = le16_to_cpu(rw->apptag);
uint16_t appmask = le16_to_cpu(rw->appmask);
uint32_t reftag = le32_to_cpu(rw->reftag);
- bool pract = !!(ctrl & NVME_RW_PRINFO_PRACT);
+ bool pract = !!(prinfo & NVME_PRINFO_PRACT);
NvmeBounceContext *ctx;
uint16_t status;
- trace_pci_nvme_dif_rw(pract, NVME_RW_PRINFO(ctrl));
+ trace_pci_nvme_dif_rw(pract, prinfo);
ctx = g_new0(NvmeBounceContext, 1);
ctx->req = req;
@@ -380,7 +380,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
if (wrz) {
BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP;
- if (ctrl & NVME_RW_PRINFO_PRCHK_MASK) {
+ if (prinfo & NVME_PRINFO_PRCHK_MASK) {
status = NVME_INVALID_PROT_INFO | NVME_DNR;
goto err;
}
@@ -389,7 +389,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
uint8_t *mbuf, *end;
int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
- status = nvme_check_prinfo(ns, ctrl, slba, reftag);
+ status = nvme_check_prinfo(ns, prinfo, slba, reftag);
if (status) {
goto err;
}
@@ -469,7 +469,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
}
}
- status = nvme_check_prinfo(ns, ctrl, slba, reftag);
+ status = nvme_check_prinfo(ns, prinfo, slba, reftag);
if (status) {
goto err;
}
@@ -478,11 +478,11 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
/* splice generated protection information into the buffer */
nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size,
ctx->mdata.bounce, ctx->mdata.iov.size,
- apptag, reftag);
+ apptag, &reftag);
} else {
status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
- ctx->mdata.bounce, ctx->mdata.iov.size, ctrl,
- slba, apptag, appmask, reftag);
+ ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
+ slba, apptag, appmask, &reftag);
if (status) {
goto err;
}
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 992e5a13f5..4275c3db63 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -56,6 +56,7 @@ void nvme_ns_init_format(NvmeNamespace *ns)
static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
{
+ static uint64_t ns_count;
NvmeIdNs *id_ns = &ns->id_ns;
uint8_t ds;
uint16_t ms;
@@ -73,47 +74,47 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
id_ns->nmic |= NVME_NMIC_NS_SHARED;
}
+ /* Substitute a missing EUI-64 by an autogenerated one */
+ ++ns_count;
+ if (!ns->params.eui64 && ns->params.eui64_default) {
+ ns->params.eui64 = ns_count + NVME_EUI64_DEFAULT;
+ }
+
/* simple copy */
id_ns->mssrl = cpu_to_le16(ns->params.mssrl);
id_ns->mcl = cpu_to_le32(ns->params.mcl);
id_ns->msrc = ns->params.msrc;
+ id_ns->eui64 = cpu_to_be64(ns->params.eui64);
ds = 31 - clz32(ns->blkconf.logical_block_size);
ms = ns->params.ms;
- if (ns->params.ms) {
- id_ns->mc = 0x3;
-
- if (ns->params.mset) {
- id_ns->flbas |= 0x10;
- }
+ id_ns->mc = NVME_ID_NS_MC_EXTENDED | NVME_ID_NS_MC_SEPARATE;
- id_ns->dpc = 0x1f;
- id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi;
-
- NvmeLBAF lbaf[16] = {
- [0] = { .ds = 9 },
- [1] = { .ds = 9, .ms = 8 },
- [2] = { .ds = 9, .ms = 16 },
- [3] = { .ds = 9, .ms = 64 },
- [4] = { .ds = 12 },
- [5] = { .ds = 12, .ms = 8 },
- [6] = { .ds = 12, .ms = 16 },
- [7] = { .ds = 12, .ms = 64 },
- };
-
- memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf));
- id_ns->nlbaf = 7;
- } else {
- NvmeLBAF lbaf[16] = {
- [0] = { .ds = 9 },
- [1] = { .ds = 12 },
- };
+ if (ms && ns->params.mset) {
+ id_ns->flbas |= NVME_ID_NS_FLBAS_EXTENDED;
+ }
- memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf));
- id_ns->nlbaf = 1;
+ id_ns->dpc = 0x1f;
+ id_ns->dps = ns->params.pi;
+ if (ns->params.pi && ns->params.pil) {
+ id_ns->dps |= NVME_ID_NS_DPS_FIRST_EIGHT;
}
+ static const NvmeLBAF lbaf[16] = {
+ [0] = { .ds = 9 },
+ [1] = { .ds = 9, .ms = 8 },
+ [2] = { .ds = 9, .ms = 16 },
+ [3] = { .ds = 9, .ms = 64 },
+ [4] = { .ds = 12 },
+ [5] = { .ds = 12, .ms = 8 },
+ [6] = { .ds = 12, .ms = 16 },
+ [7] = { .ds = 12, .ms = 64 },
+ };
+
+ memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf));
+ id_ns->nlbaf = 7;
+
for (i = 0; i <= id_ns->nlbaf; i++) {
NvmeLBAF *lbaf = &id_ns->lbaf[i];
if (lbaf->ds == ds) {
@@ -518,6 +519,7 @@ static Property nvme_ns_props[] = {
DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, false),
DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
+ DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0),
DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0),
DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0),
DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0),
@@ -538,6 +540,8 @@ static Property nvme_ns_props[] = {
params.max_open_zones, 0),
DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace,
params.zd_extension_size, 0),
+ DEFINE_PROP_BOOL("eui64-default", NvmeNamespace, params.eui64_default,
+ true),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 81a35cda14..56f8eceed2 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -26,6 +26,9 @@
#define NVME_MAX_CONTROLLERS 32
#define NVME_MAX_NAMESPACES 256
+#define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
+
+QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
typedef struct NvmeCtrl NvmeCtrl;
typedef struct NvmeNamespace NvmeNamespace;
@@ -83,6 +86,8 @@ typedef struct NvmeNamespaceParams {
bool shared;
uint32_t nsid;
QemuUUID uuid;
+ uint64_t eui64;
+ bool eui64_default;
uint16_t ms;
uint8_t mset;
@@ -382,6 +387,7 @@ typedef struct NvmeParams {
uint8_t vsl;
bool use_intel_id;
uint8_t zasl;
+ bool auto_transition_zones;
bool legacy_cmb;
} NvmeParams;
@@ -404,6 +410,7 @@ typedef struct NvmeCtrl {
uint32_t max_q_ents;
uint8_t outstanding_aers;
uint32_t irq_status;
+ int cq_pending;
uint64_t host_timestamp; /* Timestamp sent by the host */
uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */
uint64_t starttime_ms;
@@ -530,17 +537,17 @@ static const uint16_t t10_dif_crc_table[256] = {
0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
};
-uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba,
+uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
uint32_t reftag);
uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
uint64_t slba);
void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
uint8_t *mbuf, size_t mlen, uint16_t apptag,
- uint32_t reftag);
+ uint32_t *reftag);
uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
- uint8_t *mbuf, size_t mlen, uint16_t ctrl,
+ uint8_t *mbuf, size_t mlen, uint8_t prinfo,
uint64_t slba, uint16_t apptag,
- uint16_t appmask, uint32_t reftag);
+ uint16_t appmask, uint32_t *reftag);
uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req);
diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
index ea33d0ccc3..f9a1f14e26 100644
--- a/hw/nvme/trace-events
+++ b/hw/nvme/trace-events
@@ -7,16 +7,14 @@ pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d"
pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64""
-pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
+pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid 0x%"PRIx32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
-pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
-pci_nvme_format(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8""
-pci_nvme_format_ns(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8""
-pci_nvme_format_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
+pci_nvme_flush_ns(uint32_t nsid) "nsid 0x%"PRIx32""
+pci_nvme_format_set(uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8""
pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_misc_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_misc_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_dif_rw(uint8_t pract, uint8_t prinfo) "pract 0x%"PRIx8" prinfo 0x%"PRIx8""
pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
@@ -30,22 +28,20 @@ pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "app
pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32""
pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8""
pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32""
-pci_nvme_copy_in_complete(uint16_t cid) "cid %"PRIu16""
-pci_nvme_copy_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_copy_out(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32""
pci_nvme_verify(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32""
pci_nvme_rw_complete_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d"
-pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32""
-pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32""
+pci_nvme_dsm(uint32_t nr, uint32_t attr) "nr %"PRIu32" attr 0x%"PRIx32""
+pci_nvme_dsm_deallocate(uint64_t slba, uint32_t nlb) "slba %"PRIu64" nlb %"PRIu32""
pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32""
pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16""
-pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64""
pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
@@ -55,7 +51,7 @@ pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid
pci_nvme_identify_ctrl(void) "identify controller"
pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8""
pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
-pci_nvme_identify_ns_attached_list(uint16_t cntid) "cntid=%"PRIu16""
+pci_nvme_identify_ctrl_list(uint8_t cns, uint16_t cntid) "cns 0x%"PRIx8" cntid %"PRIu16""
pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8""
pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8""
@@ -80,7 +76,7 @@ pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PR
pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
-pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16""
+pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint32_t dw0, uint32_t dw1, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" dw0 0x%"PRIx32" dw1 0x%"PRIx32" status 0x%"PRIx16""
pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d"
pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d"
pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16""
@@ -101,6 +97,7 @@ pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%
pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
+pci_nvme_zns_zone_reset(uint64_t zslba) "zslba 0x%"PRIx64""
pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32""
pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32""
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 0ff9ce17a9..527105fafc 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -708,6 +708,14 @@ enum {
#define NVME_RW_PRINFO(control) ((control >> 10) & 0xf)
+enum {
+ NVME_PRINFO_PRACT = 1 << 3,
+ NVME_PRINFO_PRCHK_GUARD = 1 << 2,
+ NVME_PRINFO_PRCHK_APP = 1 << 1,
+ NVME_PRINFO_PRCHK_REF = 1 << 0,
+ NVME_PRINFO_PRCHK_MASK = 7 << 0,
+};
+
typedef struct QEMU_PACKED NvmeDsmCmd {
uint8_t opcode;
uint8_t flags;
@@ -980,6 +988,7 @@ enum NvmeIdCns {
NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
NVME_ID_CNS_NS_PRESENT = 0x11,
NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12,
+ NVME_ID_CNS_CTRL_LIST = 0x13,
NVME_ID_CNS_CS_NS_PRESENT_LIST = 0x1a,
NVME_ID_CNS_CS_NS_PRESENT = 0x1b,
NVME_ID_CNS_IO_COMMAND_SET = 0x1c,
@@ -1341,6 +1350,15 @@ enum NvmeIdNsDps {
NVME_ID_NS_DPS_FIRST_EIGHT = 8,
};
+enum NvmeIdNsFlbas {
+ NVME_ID_NS_FLBAS_EXTENDED = 1 << 4,
+};
+
+enum NvmeIdNsMc {
+ NVME_ID_NS_MC_EXTENDED = 1 << 0,
+ NVME_ID_NS_MC_SEPARATE = 1 << 1,
+};
+
#define NVME_ID_NS_DPS_TYPE(dps) (dps & NVME_ID_NS_DPS_TYPE_MASK)
typedef struct NvmeDifTuple {