aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2015-07-14 16:51:44 +0100
committerPeter Maydell <peter.maydell@linaro.org>2015-07-14 16:51:44 +0100
commitf3947986d9bbbae1087c4c33880d3f8dbf1f1384 (patch)
treef497dabaf32b08d65d298c67ec0d74f67d6b17a2
parent0030ff40472b9ebf0e0595afbc8d7e428218c5d7 (diff)
parente34d8f297d51b7ffa5dce72df1e45fa94cff989c (diff)
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches for 2.4.0-rc1 # gpg: Signature made Tue Jul 14 16:15:35 2015 BST using RSA key ID C88F2FD6 # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" * remotes/kevin/tags/for-upstream: rbd: fix ceph settings precedence rbd: make qemu's cache setting override any ceph setting MAINTAINERS: update email address rbd: remove unused constants and fields block: Fix backing file child when modifying graph block: Reorder cleanups in bdrv_close() block: Introduce bdrv_unref_child() block: Introduce bdrv_open_child() block: Move bdrv_attach_child() calls up the call chain nvme: properly report volatile write caches nvme: implement the Flush command Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--MAINTAINERS2
-rw-r--r--block.c144
-rw-r--r--block/rbd.c64
-rw-r--r--hw/block/nvme.c38
-rw-r--r--hw/block/nvme.h1
-rw-r--r--include/block/block.h7
-rw-r--r--include/block/block_int.h5
7 files changed, 177 insertions, 84 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 411da3cf57..978b7174f3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1169,7 +1169,7 @@ S: Supported
F: block/vmdk.c
RBD
-M: Josh Durgin <josh.durgin@inktank.com>
+M: Josh Durgin <jdurgin@redhat.com>
M: Jeff Cody <jcody@redhat.com>
L: qemu-block@nongnu.org
S: Supported
diff --git a/block.c b/block.c
index 5e80336e9f..d088ee02ff 100644
--- a/block.c
+++ b/block.c
@@ -1102,12 +1102,46 @@ static int bdrv_fill_options(QDict **options, const char **pfilename,
return 0;
}
+static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
+ BlockDriverState *child_bs,
+ const BdrvChildRole *child_role)
+{
+ BdrvChild *child = g_new(BdrvChild, 1);
+ *child = (BdrvChild) {
+ .bs = child_bs,
+ .role = child_role,
+ };
+
+ QLIST_INSERT_HEAD(&parent_bs->children, child, next);
+
+ return child;
+}
+
+static void bdrv_detach_child(BdrvChild *child)
+{
+ QLIST_REMOVE(child, next);
+ g_free(child);
+}
+
+void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
+{
+ BlockDriverState *child_bs = child->bs;
+
+ if (child->bs->inherits_from == parent) {
+ child->bs->inherits_from = NULL;
+ }
+
+ bdrv_detach_child(child);
+ bdrv_unref(child_bs);
+}
+
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
{
if (bs->backing_hd) {
assert(bs->backing_blocker);
bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
+ bdrv_detach_child(bs->backing_child);
} else if (backing_hd) {
error_setg(&bs->backing_blocker,
"node is used as backing hd of '%s'",
@@ -1118,8 +1152,10 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
if (!backing_hd) {
error_free(bs->backing_blocker);
bs->backing_blocker = NULL;
+ bs->backing_child = NULL;
goto out;
}
+ bs->backing_child = bdrv_attach_child(bs, backing_hd, &child_backing);
bs->open_flags &= ~BDRV_O_NO_BACKING;
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
pstrcpy(bs->backing_format, sizeof(bs->backing_format),
@@ -1202,6 +1238,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
error_free(local_err);
goto free_exit;
}
+
bdrv_set_backing_hd(bs, backing_hd);
free_exit:
@@ -1214,7 +1251,7 @@ free_exit:
* device's options.
*
* If allow_none is true, no image will be opened if filename is false and no
- * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
+ * BlockdevRef is given. NULL will be returned, but errp remains unset.
*
* bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
* That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
@@ -1222,21 +1259,21 @@ free_exit:
* BlockdevRef.
*
* The BlockdevRef will be removed from the options QDict.
- *
- * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
*/
-int bdrv_open_image(BlockDriverState **pbs, const char *filename,
- QDict *options, const char *bdref_key,
- BlockDriverState* parent, const BdrvChildRole *child_role,
- bool allow_none, Error **errp)
+BdrvChild *bdrv_open_child(const char *filename,
+ QDict *options, const char *bdref_key,
+ BlockDriverState* parent,
+ const BdrvChildRole *child_role,
+ bool allow_none, Error **errp)
{
+ BdrvChild *c = NULL;
+ BlockDriverState *bs;
QDict *image_options;
int ret;
char *bdref_key_dot;
const char *reference;
- assert(pbs);
- assert(*pbs == NULL);
+ assert(child_role != NULL);
bdref_key_dot = g_strdup_printf("%s.", bdref_key);
qdict_extract_subqdict(options, &image_options, bdref_key_dot);
@@ -1244,23 +1281,60 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename,
reference = qdict_get_try_str(options, bdref_key);
if (!filename && !reference && !qdict_size(image_options)) {
- if (allow_none) {
- ret = 0;
- } else {
+ if (!allow_none) {
error_setg(errp, "A block device must be specified for \"%s\"",
bdref_key);
- ret = -EINVAL;
}
QDECREF(image_options);
goto done;
}
- ret = bdrv_open_inherit(pbs, filename, reference, image_options, 0,
+ bs = NULL;
+ ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
parent, child_role, NULL, errp);
+ if (ret < 0) {
+ goto done;
+ }
+
+ c = bdrv_attach_child(parent, bs, child_role);
done:
qdict_del(options, bdref_key);
- return ret;
+ return c;
+}
+
+/*
+ * This is a version of bdrv_open_child() that returns 0/-EINVAL instead of
+ * a BdrvChild object.
+ *
+ * If allow_none is true, no image will be opened if filename is false and no
+ * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
+ *
+ * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
+ */
+int bdrv_open_image(BlockDriverState **pbs, const char *filename,
+ QDict *options, const char *bdref_key,
+ BlockDriverState* parent, const BdrvChildRole *child_role,
+ bool allow_none, Error **errp)
+{
+ Error *local_err = NULL;
+ BdrvChild *c;
+
+ assert(pbs);
+ assert(*pbs == NULL);
+
+ c = bdrv_open_child(filename, options, bdref_key, parent, child_role,
+ allow_none, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return -EINVAL;
+ }
+
+ if (c != NULL) {
+ *pbs = c->bs;
+ }
+
+ return 0;
}
int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
@@ -1328,19 +1402,6 @@ out:
return ret;
}
-static void bdrv_attach_child(BlockDriverState *parent_bs,
- BlockDriverState *child_bs,
- const BdrvChildRole *child_role)
-{
- BdrvChild *child = g_new(BdrvChild, 1);
- *child = (BdrvChild) {
- .bs = child_bs,
- .role = child_role,
- };
-
- QLIST_INSERT_HEAD(&parent_bs->children, child, next);
-}
-
/*
* Opens a disk image (raw, qcow2, vmdk, ...)
*
@@ -1393,9 +1454,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
return -ENODEV;
}
bdrv_ref(bs);
- if (child_role) {
- bdrv_attach_child(parent, bs, child_role);
- }
*pbs = bs;
return 0;
}
@@ -1540,10 +1598,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
goto close_and_fail;
}
- if (child_role) {
- bdrv_attach_child(parent, bs, child_role);
- }
-
QDECREF(options);
*pbs = bs;
return 0;
@@ -1849,20 +1903,23 @@ void bdrv_close(BlockDriverState *bs)
if (bs->drv) {
BdrvChild *child, *next;
- QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
- if (child->bs->inherits_from == bs) {
- child->bs->inherits_from = NULL;
- }
- QLIST_REMOVE(child, next);
- g_free(child);
- }
+ bs->drv->bdrv_close(bs);
if (bs->backing_hd) {
BlockDriverState *backing_hd = bs->backing_hd;
bdrv_set_backing_hd(bs, NULL);
bdrv_unref(backing_hd);
}
- bs->drv->bdrv_close(bs);
+
+ QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
+ /* TODO Remove bdrv_unref() from drivers' close function and use
+ * bdrv_unref_child() here */
+ if (child->bs->inherits_from == bs) {
+ child->bs->inherits_from = NULL;
+ }
+ bdrv_detach_child(child);
+ }
+
g_free(bs->opaque);
bs->opaque = NULL;
bs->drv = NULL;
@@ -2116,7 +2173,6 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
/* The contents of 'tmp' will become bs_top, as we are
* swapping bs_new and bs_top contents. */
bdrv_set_backing_hd(bs_top, bs_new);
- bdrv_attach_child(bs_top, bs_new, &child_backing);
}
static void bdrv_delete(BlockDriverState *bs)
diff --git a/block/rbd.c b/block/rbd.c
index fbe87e035b..a60a19d58d 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -74,25 +74,18 @@ typedef struct RBDAIOCB {
QEMUIOVector *qiov;
char *bounce;
RBDAIOCmd cmd;
- int64_t sector_num;
int error;
struct BDRVRBDState *s;
- int status;
} RBDAIOCB;
typedef struct RADOSCB {
- int rcbid;
RBDAIOCB *acb;
struct BDRVRBDState *s;
- int done;
int64_t size;
char *buf;
int64_t ret;
} RADOSCB;
-#define RBD_FD_READ 0
-#define RBD_FD_WRITE 1
-
typedef struct BDRVRBDState {
rados_t cluster;
rados_ioctx_t io_ctx;
@@ -235,7 +228,9 @@ static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
return NULL;
}
-static int qemu_rbd_set_conf(rados_t cluster, const char *conf, Error **errp)
+static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
+ bool only_read_conf_file,
+ Error **errp)
{
char *p, *buf;
char name[RBD_MAX_CONF_NAME_SIZE];
@@ -267,14 +262,18 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf, Error **errp)
qemu_rbd_unescape(value);
if (strcmp(name, "conf") == 0) {
- ret = rados_conf_read_file(cluster, value);
- if (ret < 0) {
- error_setg(errp, "error reading conf file %s", value);
- break;
+ /* read the conf file alone, so it doesn't override more
+ specific settings for a particular device */
+ if (only_read_conf_file) {
+ ret = rados_conf_read_file(cluster, value);
+ if (ret < 0) {
+ error_setg(errp, "error reading conf file %s", value);
+ break;
+ }
}
} else if (strcmp(name, "id") == 0) {
/* ignore, this is parsed by qemu_rbd_parse_clientname() */
- } else {
+ } else if (!only_read_conf_file) {
ret = rados_conf_set(cluster, name, value);
if (ret < 0) {
error_setg(errp, "invalid conf option %s", name);
@@ -337,10 +336,15 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
if (strstr(conf, "conf=") == NULL) {
/* try default location, but ignore failure */
rados_conf_read_file(cluster, NULL);
+ } else if (conf[0] != '\0' &&
+ qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
+ rados_shutdown(cluster);
+ error_propagate(errp, local_err);
+ return -EIO;
}
if (conf[0] != '\0' &&
- qemu_rbd_set_conf(cluster, conf, &local_err) < 0) {
+ qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
rados_shutdown(cluster);
error_propagate(errp, local_err);
return -EIO;
@@ -405,7 +409,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
}
qemu_vfree(acb->bounce);
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
- acb->status = 0;
qemu_aio_unref(acb);
}
@@ -468,6 +471,23 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
s->snap = g_strdup(snap_buf);
}
+ if (strstr(conf, "conf=") == NULL) {
+ /* try default location, but ignore failure */
+ rados_conf_read_file(s->cluster, NULL);
+ } else if (conf[0] != '\0') {
+ r = qemu_rbd_set_conf(s->cluster, conf, true, errp);
+ if (r < 0) {
+ goto failed_shutdown;
+ }
+ }
+
+ if (conf[0] != '\0') {
+ r = qemu_rbd_set_conf(s->cluster, conf, false, errp);
+ if (r < 0) {
+ goto failed_shutdown;
+ }
+ }
+
/*
* Fallback to more conservative semantics if setting cache
* options fails. Ignore errors from setting rbd_cache because the
@@ -481,18 +501,6 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
rados_conf_set(s->cluster, "rbd_cache", "true");
}
- if (strstr(conf, "conf=") == NULL) {
- /* try default location, but ignore failure */
- rados_conf_read_file(s->cluster, NULL);
- }
-
- if (conf[0] != '\0') {
- r = qemu_rbd_set_conf(s->cluster, conf, errp);
- if (r < 0) {
- goto failed_shutdown;
- }
- }
-
r = rados_connect(s->cluster);
if (r < 0) {
error_setg(errp, "error connecting");
@@ -621,7 +629,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
acb->error = 0;
acb->s = s;
acb->bh = NULL;
- acb->status = -EINPROGRESS;
if (cmd == RBD_AIO_WRITE) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
@@ -633,7 +640,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
size = nb_sectors * BDRV_SECTOR_SIZE;
rcb = g_new(RADOSCB, 1);
- rcb->done = 0;
rcb->acb = acb;
rcb->buf = buf;
rcb->s = acb->s;
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index c6a6a0e49a..40d4880326 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -207,11 +207,23 @@ static void nvme_rw_cb(void *opaque, int ret)
} else {
req->status = NVME_INTERNAL_DEV_ERROR;
}
-
- qemu_sglist_destroy(&req->qsg);
+ if (req->has_sg) {
+ qemu_sglist_destroy(&req->qsg);
+ }
nvme_enqueue_req_completion(cq, req);
}
+static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
+ NvmeRequest *req)
+{
+ req->has_sg = false;
+ block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
+ BLOCK_ACCT_FLUSH);
+ req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req);
+
+ return NVME_NO_COMPLETE;
+}
+
static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
NvmeRequest *req)
{
@@ -235,6 +247,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
}
assert((nlb << data_shift) == req->qsg.size);
+ req->has_sg = true;
dma_acct_start(n->conf.blk, &req->acct, &req->qsg,
is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
req->aiocb = is_write ?
@@ -256,7 +269,7 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
ns = &n->namespaces[nsid - 1];
switch (cmd->opcode) {
case NVME_CMD_FLUSH:
- return NVME_SUCCESS;
+ return nvme_flush(n, ns, cmd, req);
case NVME_CMD_WRITE:
case NVME_CMD_READ:
return nvme_rw(n, ns, cmd, req);
@@ -474,26 +487,32 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
{
uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+ uint32_t result;
switch (dw10) {
- case NVME_NUMBER_OF_QUEUES:
- req->cqe.result =
- cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
- break;
case NVME_VOLATILE_WRITE_CACHE:
- req->cqe.result = cpu_to_le32(1);
+ result = blk_enable_write_cache(n->conf.blk);
+ break;
+ case NVME_NUMBER_OF_QUEUES:
+ result = cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
break;
default:
return NVME_INVALID_FIELD | NVME_DNR;
}
+
+ req->cqe.result = result;
return NVME_SUCCESS;
}
static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
{
uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+ uint32_t dw11 = le32_to_cpu(cmd->cdw11);
switch (dw10) {
+ case NVME_VOLATILE_WRITE_CACHE:
+ blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
+ break;
case NVME_NUMBER_OF_QUEUES:
req->cqe.result =
cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
@@ -818,6 +837,9 @@ static int nvme_init(PCIDevice *pci_dev)
id->psd[0].mp = cpu_to_le16(0x9c4);
id->psd[0].enlat = cpu_to_le32(0x10);
id->psd[0].exlat = cpu_to_le32(0x4);
+ if (blk_enable_write_cache(n->conf.blk)) {
+ id->vwc = 1;
+ }
n->bar.cap = 0;
NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index b6ccb655a6..bf3a3ccac8 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -638,6 +638,7 @@ typedef struct NvmeRequest {
struct NvmeSQueue *sq;
BlockAIOCB *aiocb;
uint16_t status;
+ bool has_sg;
NvmeCqe cqe;
BlockAcctCookie acct;
QEMUSGList qsg;
diff --git a/include/block/block.h b/include/block/block.h
index 06e4137008..37916f7208 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -12,6 +12,7 @@
/* block.c */
typedef struct BlockDriver BlockDriver;
typedef struct BlockJob BlockJob;
+typedef struct BdrvChild BdrvChild;
typedef struct BdrvChildRole BdrvChildRole;
typedef struct BlockDriverInfo {
@@ -208,6 +209,11 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename,
QDict *options, const char *bdref_key,
BlockDriverState* parent, const BdrvChildRole *child_role,
bool allow_none, Error **errp);
+BdrvChild *bdrv_open_child(const char *filename,
+ QDict *options, const char *bdref_key,
+ BlockDriverState* parent,
+ const BdrvChildRole *child_role,
+ bool allow_none, Error **errp);
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp);
int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp);
@@ -507,6 +513,7 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs);
void bdrv_ref(BlockDriverState *bs);
void bdrv_unref(BlockDriverState *bs);
+void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8996baf2f0..14ad4c334b 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -335,11 +335,11 @@ struct BdrvChildRole {
extern const BdrvChildRole child_file;
extern const BdrvChildRole child_format;
-typedef struct BdrvChild {
+struct BdrvChild {
BlockDriverState *bs;
const BdrvChildRole *role;
QLIST_ENTRY(BdrvChild) next;
-} BdrvChild;
+};
/*
* Note: the function bdrv_append() copies and swaps contents of
@@ -379,6 +379,7 @@ struct BlockDriverState {
char exact_filename[PATH_MAX];
BlockDriverState *backing_hd;
+ BdrvChild *backing_child;
BlockDriverState *file;
NotifierList close_notifiers;