aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2012-09-25 16:06:16 -0500
committerAnthony Liguori <aliguori@us.ibm.com>2012-09-25 16:06:16 -0500
commit444dbc381b94f5b54da521df61e751f28b00ce88 (patch)
tree81801cec4db20dfd55d32159b6ae673244c4fd57
parent3988475b9b7fa251b00a29b076761d8c1c7e64dc (diff)
parentdc1c13d96912731d4c7c7e13d31c93b8735f1203 (diff)
Merge remote-tracking branch 'kwolf/for-anthony' into staging
* kwolf/for-anthony: block: remove keep_read_only flag from BlockDriverState struct block: convert bdrv_commit() to use bdrv_reopen() block: vpc image file reopen block: vdi image file reopen block: vmdk image file reopen block: qcow image file reopen block: qcow2 image file reopen block: qed image file reopen block: raw image file reopen block: raw-posix image file reopen block: purge s->aligned_buf and s->aligned_buf_size from raw-posix.c block: use BDRV_O_NOCACHE instead of s->aligned_buf in raw-posix.c block: do not parse BDRV_O_CACHE_WB in block drivers block: move open flag parsing in raw block drivers to helper functions block: move aio initialization into a helper function block: Framework for reopening files safely block: make bdrv_set_enable_write_cache() modify open_flags block: correctly set the keep_read_only flag blockdev: preserve readonly and snapshot states across media changes
-rw-r--r--block.c299
-rw-r--r--block.h18
-rw-r--r--block/iscsi.c4
-rw-r--r--block/qcow.c10
-rw-r--r--block/qcow2.c10
-rw-r--r--block/qed.c9
-rw-r--r--block/raw-posix.c225
-rw-r--r--block/raw-win32.c40
-rw-r--r--block/raw.c10
-rw-r--r--block/rbd.c6
-rw-r--r--block/sheepdog.c14
-rw-r--r--block/vdi.c7
-rw-r--r--block/vmdk.c35
-rw-r--r--block/vpc.c7
-rw-r--r--block_int.h9
-rw-r--r--blockdev.c2
16 files changed, 563 insertions, 142 deletions
diff --git a/block.c b/block.c
index e78039bd5a..751ebdc06b 100644
--- a/block.c
+++ b/block.c
@@ -668,7 +668,7 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename,
open_flags |= BDRV_O_RDWR;
}
- bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
+ bs->read_only = !(open_flags & BDRV_O_RDWR);
/* Open the image, either directly or using a protocol */
if (drv->bdrv_file_open) {
@@ -808,6 +808,10 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
goto unlink_and_fail;
}
+ if (flags & BDRV_O_RDWR) {
+ flags |= BDRV_O_ALLOW_RDWR;
+ }
+
/* Open the image */
ret = bdrv_open_common(bs, filename, flags, drv);
if (ret < 0) {
@@ -837,12 +841,6 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
bdrv_close(bs);
return ret;
}
- if (bs->is_temporary) {
- bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
- } else {
- /* base image inherits from "parent" */
- bs->backing_hd->keep_read_only = bs->keep_read_only;
- }
}
if (!bdrv_key_required(bs)) {
@@ -863,6 +861,238 @@ unlink_and_fail:
return ret;
}
+typedef struct BlockReopenQueueEntry {
+ bool prepared;
+ BDRVReopenState state;
+ QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
+} BlockReopenQueueEntry;
+
+/*
+ * Adds a BlockDriverState to a simple queue for an atomic, transactional
+ * reopen of multiple devices.
+ *
+ * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
+ * already performed, or alternatively may be NULL a new BlockReopenQueue will
+ * be created and initialized. This newly created BlockReopenQueue should be
+ * passed back in for subsequent calls that are intended to be of the same
+ * atomic 'set'.
+ *
+ * bs is the BlockDriverState to add to the reopen queue.
+ *
+ * flags contains the open flags for the associated bs
+ *
+ * returns a pointer to bs_queue, which is either the newly allocated
+ * bs_queue, or the existing bs_queue being used.
+ *
+ */
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+ BlockDriverState *bs, int flags)
+{
+ assert(bs != NULL);
+
+ BlockReopenQueueEntry *bs_entry;
+ if (bs_queue == NULL) {
+ bs_queue = g_new0(BlockReopenQueue, 1);
+ QSIMPLEQ_INIT(bs_queue);
+ }
+
+ if (bs->file) {
+ bdrv_reopen_queue(bs_queue, bs->file, flags);
+ }
+
+ bs_entry = g_new0(BlockReopenQueueEntry, 1);
+ QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
+
+ bs_entry->state.bs = bs;
+ bs_entry->state.flags = flags;
+
+ return bs_queue;
+}
+
+/*
+ * Reopen multiple BlockDriverStates atomically & transactionally.
+ *
+ * The queue passed in (bs_queue) must have been built up previous
+ * via bdrv_reopen_queue().
+ *
+ * Reopens all BDS specified in the queue, with the appropriate
+ * flags. All devices are prepared for reopen, and failure of any
+ * device will cause all device changes to be abandonded, and intermediate
+ * data cleaned up.
+ *
+ * If all devices prepare successfully, then the changes are committed
+ * to all devices.
+ *
+ */
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
+{
+ int ret = -1;
+ BlockReopenQueueEntry *bs_entry, *next;
+ Error *local_err = NULL;
+
+ assert(bs_queue != NULL);
+
+ bdrv_drain_all();
+
+ QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+ if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
+ error_propagate(errp, local_err);
+ goto cleanup;
+ }
+ bs_entry->prepared = true;
+ }
+
+ /* If we reach this point, we have success and just need to apply the
+ * changes
+ */
+ QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+ bdrv_reopen_commit(&bs_entry->state);
+ }
+
+ ret = 0;
+
+cleanup:
+ QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
+ if (ret && bs_entry->prepared) {
+ bdrv_reopen_abort(&bs_entry->state);
+ }
+ g_free(bs_entry);
+ }
+ g_free(bs_queue);
+ return ret;
+}
+
+
+/* Reopen a single BlockDriverState with the specified flags. */
+int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
+{
+ int ret = -1;
+ Error *local_err = NULL;
+ BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
+
+ ret = bdrv_reopen_multiple(queue, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ }
+ return ret;
+}
+
+
+/*
+ * Prepares a BlockDriverState for reopen. All changes are staged in the
+ * 'opaque' field of the BDRVReopenState, which is used and allocated by
+ * the block driver layer .bdrv_reopen_prepare()
+ *
+ * bs is the BlockDriverState to reopen
+ * flags are the new open flags
+ * queue is the reopen queue
+ *
+ * Returns 0 on success, non-zero on error. On error errp will be set
+ * as well.
+ *
+ * On failure, bdrv_reopen_abort() will be called to clean up any data.
+ * It is the responsibility of the caller to then call the abort() or
+ * commit() for any other BDS that have been left in a prepare() state
+ *
+ */
+int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
+ Error **errp)
+{
+ int ret = -1;
+ Error *local_err = NULL;
+ BlockDriver *drv;
+
+ assert(reopen_state != NULL);
+ assert(reopen_state->bs->drv != NULL);
+ drv = reopen_state->bs->drv;
+
+ /* if we are to stay read-only, do not allow permission change
+ * to r/w */
+ if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
+ reopen_state->flags & BDRV_O_RDWR) {
+ error_set(errp, QERR_DEVICE_IS_READ_ONLY,
+ reopen_state->bs->device_name);
+ goto error;
+ }
+
+
+ ret = bdrv_flush(reopen_state->bs);
+ if (ret) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
+ strerror(-ret));
+ goto error;
+ }
+
+ if (drv->bdrv_reopen_prepare) {
+ ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
+ if (ret) {
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ } else {
+ error_set(errp, QERR_OPEN_FILE_FAILED,
+ reopen_state->bs->filename);
+ }
+ goto error;
+ }
+ } else {
+ /* It is currently mandatory to have a bdrv_reopen_prepare()
+ * handler for each supported drv. */
+ error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+ drv->format_name, reopen_state->bs->device_name,
+ "reopening of file");
+ ret = -1;
+ goto error;
+ }
+
+ ret = 0;
+
+error:
+ return ret;
+}
+
+/*
+ * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
+ * makes them final by swapping the staging BlockDriverState contents into
+ * the active BlockDriverState contents.
+ */
+void bdrv_reopen_commit(BDRVReopenState *reopen_state)
+{
+ BlockDriver *drv;
+
+ assert(reopen_state != NULL);
+ drv = reopen_state->bs->drv;
+ assert(drv != NULL);
+
+ /* If there are any driver level actions to take */
+ if (drv->bdrv_reopen_commit) {
+ drv->bdrv_reopen_commit(reopen_state);
+ }
+
+ /* set BDS specific flags now */
+ reopen_state->bs->open_flags = reopen_state->flags;
+ reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
+ BDRV_O_CACHE_WB);
+ reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
+}
+
+/*
+ * Abort the reopen, and delete and free the staged changes in
+ * reopen_state
+ */
+void bdrv_reopen_abort(BDRVReopenState *reopen_state)
+{
+ BlockDriver *drv;
+
+ assert(reopen_state != NULL);
+ drv = reopen_state->bs->drv;
+ assert(drv != NULL);
+
+ if (drv->bdrv_reopen_abort) {
+ drv->bdrv_reopen_abort(reopen_state);
+ }
+}
+
+
void bdrv_close(BlockDriverState *bs)
{
bdrv_flush(bs);
@@ -1269,13 +1499,11 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
int bdrv_commit(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
- BlockDriver *backing_drv;
int64_t sector, total_sectors;
int n, ro, open_flags;
- int ret = 0, rw_ret = 0;
+ int ret = 0;
uint8_t *buf;
char filename[1024];
- BlockDriverState *bs_rw, *bs_ro;
if (!drv)
return -ENOMEDIUM;
@@ -1284,42 +1512,18 @@ int bdrv_commit(BlockDriverState *bs)
return -ENOTSUP;
}
- if (bs->backing_hd->keep_read_only) {
- return -EACCES;
- }
-
if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
return -EBUSY;
}
- backing_drv = bs->backing_hd->drv;
ro = bs->backing_hd->read_only;
strncpy(filename, bs->backing_hd->filename, sizeof(filename));
open_flags = bs->backing_hd->open_flags;
if (ro) {
- /* re-open as RW */
- bdrv_delete(bs->backing_hd);
- bs->backing_hd = NULL;
- bs_rw = bdrv_new("");
- rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
- backing_drv);
- if (rw_ret < 0) {
- bdrv_delete(bs_rw);
- /* try to re-open read-only */
- bs_ro = bdrv_new("");
- ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
- backing_drv);
- if (ret < 0) {
- bdrv_delete(bs_ro);
- /* drive not functional anymore */
- bs->drv = NULL;
- return ret;
- }
- bs->backing_hd = bs_ro;
- return rw_ret;
+ if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
+ return -EACCES;
}
- bs->backing_hd = bs_rw;
}
total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
@@ -1356,20 +1560,8 @@ ro_cleanup:
g_free(buf);
if (ro) {
- /* re-open as RO */
- bdrv_delete(bs->backing_hd);
- bs->backing_hd = NULL;
- bs_ro = bdrv_new("");
- ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
- backing_drv);
- if (ret < 0) {
- bdrv_delete(bs_ro);
- /* drive not functional anymore */
- bs->drv = NULL;
- return ret;
- }
- bs->backing_hd = bs_ro;
- bs->backing_hd->keep_read_only = 0;
+ /* ignoring error return here */
+ bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
}
return ret;
@@ -2168,6 +2360,13 @@ int bdrv_enable_write_cache(BlockDriverState *bs)
void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
{
bs->enable_write_cache = wce;
+
+ /* so a reopen() will preserve wce */
+ if (wce) {
+ bs->open_flags |= BDRV_O_CACHE_WB;
+ } else {
+ bs->open_flags &= ~BDRV_O_CACHE_WB;
+ }
}
int bdrv_is_encrypted(BlockDriverState *bs)
diff --git a/block.h b/block.h
index 2e2be11071..b1095d8599 100644
--- a/block.h
+++ b/block.h
@@ -80,6 +80,7 @@ typedef struct BlockDevOps {
#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
#define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */
#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
+#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)
@@ -96,6 +97,15 @@ typedef enum {
BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
} BlockQMPEventAction;
+typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
+
+typedef struct BDRVReopenState {
+ BlockDriverState *bs;
+ int flags;
+ void *opaque;
+} BDRVReopenState;
+
+
void bdrv_iostatus_enable(BlockDriverState *bs);
void bdrv_iostatus_reset(BlockDriverState *bs);
void bdrv_iostatus_disable(BlockDriverState *bs);
@@ -130,6 +140,14 @@ int bdrv_parse_cache_flags(const char *mode, int *flags);
int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags);
int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
BlockDriver *drv);
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+ BlockDriverState *bs, int flags);
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
+int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp);
+void bdrv_reopen_commit(BDRVReopenState *reopen_state);
+void bdrv_reopen_abort(BDRVReopenState *reopen_state);
void bdrv_close(BlockDriverState *bs);
int bdrv_attach_dev(BlockDriverState *bs, void *dev);
void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev);
diff --git a/block/iscsi.c b/block/iscsi.c
index fb001b955c..d0b1a10ee4 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -262,10 +262,6 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
acb->task->xfer_dir = SCSI_XFER_WRITE;
acb->task->cdb_size = 16;
acb->task->cdb[0] = 0x8a;
- if (!(bs->open_flags & BDRV_O_CACHE_WB)) {
- /* set FUA on writes when cache mode is write through */
- acb->task->cdb[1] |= 0x04;
- }
lba = sector_qemu2lun(sector_num, iscsilun);
*(uint32_t *)&acb->task->cdb[2] = htonl(lba >> 32);
*(uint32_t *)&acb->task->cdb[6] = htonl(lba & 0xffffffff);
diff --git a/block/qcow.c b/block/qcow.c
index 7b5ab87d2d..b239c82ae0 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -197,6 +197,15 @@ static int qcow_open(BlockDriverState *bs, int flags)
return ret;
}
+
+/* We have nothing to do for QCOW reopen, stubs just return
+ * success */
+static int qcow_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
static int qcow_set_key(BlockDriverState *bs, const char *key)
{
BDRVQcowState *s = bs->opaque;
@@ -868,6 +877,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_probe = qcow_probe,
.bdrv_open = qcow_open,
.bdrv_close = qcow_close,
+ .bdrv_reopen_prepare = qcow_reopen_prepare,
.bdrv_create = qcow_create,
.bdrv_co_readv = qcow_co_readv,
diff --git a/block/qcow2.c b/block/qcow2.c
index 8f183f1465..aa5e603cd3 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -52,6 +52,7 @@ typedef struct {
uint32_t magic;
uint32_t len;
} QCowExtension;
+
#define QCOW2_EXT_MAGIC_END 0
#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
@@ -558,6 +559,14 @@ static int qcow2_set_key(BlockDriverState *bs, const char *key)
return 0;
}
+/* We have nothing to do for QCOW2 reopen, stubs just return
+ * success */
+static int qcow2_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum)
{
@@ -1679,6 +1688,7 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_probe = qcow2_probe,
.bdrv_open = qcow2_open,
.bdrv_close = qcow2_close,
+ .bdrv_reopen_prepare = qcow2_reopen_prepare,
.bdrv_create = qcow2_create,
.bdrv_co_is_allocated = qcow2_co_is_allocated,
.bdrv_set_key = qcow2_set_key,
diff --git a/block/qed.c b/block/qed.c
index 21cb239870..6c182ca917 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -505,6 +505,14 @@ out:
return ret;
}
+/* We have nothing to do for QED reopen, stubs just return
+ * success */
+static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
static void bdrv_qed_close(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
@@ -1564,6 +1572,7 @@ static BlockDriver bdrv_qed = {
.bdrv_rebind = bdrv_qed_rebind,
.bdrv_open = bdrv_qed_open,
.bdrv_close = bdrv_qed_close,
+ .bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
.bdrv_create = bdrv_qed_create,
.bdrv_co_is_allocated = bdrv_qed_co_is_allocated,
.bdrv_make_empty = bdrv_qed_make_empty,
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 6be20b1925..28d439fa81 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -133,13 +133,19 @@ typedef struct BDRVRawState {
int use_aio;
void *aio_ctx;
#endif
- uint8_t *aligned_buf;
- unsigned aligned_buf_size;
#ifdef CONFIG_XFS
bool is_xfs : 1;
#endif
} BDRVRawState;
+typedef struct BDRVRawReopenState {
+ int fd;
+ int open_flags;
+#ifdef CONFIG_LINUX_AIO
+ int use_aio;
+#endif
+} BDRVRawReopenState;
+
static int fd_open(BlockDriverState *bs);
static int64_t raw_getlength(BlockDriverState *bs);
@@ -185,6 +191,57 @@ static int raw_normalize_devicepath(const char **filename)
}
#endif
+static void raw_parse_flags(int bdrv_flags, int *open_flags)
+{
+ assert(open_flags != NULL);
+
+ *open_flags |= O_BINARY;
+ *open_flags &= ~O_ACCMODE;
+ if (bdrv_flags & BDRV_O_RDWR) {
+ *open_flags |= O_RDWR;
+ } else {
+ *open_flags |= O_RDONLY;
+ }
+
+ /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+ * and O_DIRECT for no caching. */
+ if ((bdrv_flags & BDRV_O_NOCACHE)) {
+ *open_flags |= O_DIRECT;
+ }
+}
+
+#ifdef CONFIG_LINUX_AIO
+static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
+{
+ int ret = -1;
+ assert(aio_ctx != NULL);
+ assert(use_aio != NULL);
+ /*
+ * Currently Linux do AIO only for files opened with O_DIRECT
+ * specified so check NOCACHE flag too
+ */
+ if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
+ (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
+
+ /* if non-NULL, laio_init() has already been run */
+ if (*aio_ctx == NULL) {
+ *aio_ctx = laio_init();
+ if (!*aio_ctx) {
+ goto error;
+ }
+ }
+ *use_aio = 1;
+ } else {
+ *use_aio = 0;
+ }
+
+ ret = 0;
+
+error:
+ return ret;
+}
+#endif
+
static int raw_open_common(BlockDriverState *bs, const char *filename,
int bdrv_flags, int open_flags)
{
@@ -196,20 +253,8 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
return ret;
}
- s->open_flags = open_flags | O_BINARY;
- s->open_flags &= ~O_ACCMODE;
- if (bdrv_flags & BDRV_O_RDWR) {
- s->open_flags |= O_RDWR;
- } else {
- s->open_flags |= O_RDONLY;
- }
-
- /* Use O_DSYNC for write-through caching, no flags for write-back caching,
- * and O_DIRECT for no caching. */
- if ((bdrv_flags & BDRV_O_NOCACHE))
- s->open_flags |= O_DIRECT;
- if (!(bdrv_flags & BDRV_O_CACHE_WB))
- s->open_flags |= O_DSYNC;
+ s->open_flags = open_flags;
+ raw_parse_flags(bdrv_flags, &s->open_flags);
s->fd = -1;
fd = qemu_open(filename, s->open_flags, 0644);
@@ -220,45 +265,17 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
return ret;
}
s->fd = fd;
- s->aligned_buf = NULL;
-
- if ((bdrv_flags & BDRV_O_NOCACHE)) {
- /*
- * Allocate a buffer for read/modify/write cycles. Chose the size
- * pessimistically as we don't know the block size yet.
- */
- s->aligned_buf_size = 32 * MAX_BLOCKSIZE;
- s->aligned_buf = qemu_memalign(MAX_BLOCKSIZE, s->aligned_buf_size);
- if (s->aligned_buf == NULL) {
- goto out_close;
- }
- }
/* We're falling back to POSIX AIO in some cases so init always */
if (paio_init() < 0) {
- goto out_free_buf;
+ goto out_close;
}
#ifdef CONFIG_LINUX_AIO
- /*
- * Currently Linux do AIO only for files opened with O_DIRECT
- * specified so check NOCACHE flag too
- */
- if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
- (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
-
- s->aio_ctx = laio_init();
- if (!s->aio_ctx) {
- goto out_free_buf;
- }
- s->use_aio = 1;
- } else
-#endif
- {
-#ifdef CONFIG_LINUX_AIO
- s->use_aio = 0;
-#endif
+ if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
+ goto out_close;
}
+#endif
#ifdef CONFIG_XFS
if (platform_test_xfs_fd(s->fd)) {
@@ -268,8 +285,6 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
return 0;
-out_free_buf:
- qemu_vfree(s->aligned_buf);
out_close:
qemu_close(fd);
return -errno;
@@ -283,6 +298,109 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
return raw_open_common(bs, filename, flags, 0);
}
+static int raw_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ BDRVRawState *s;
+ BDRVRawReopenState *raw_s;
+ int ret = 0;
+
+ assert(state != NULL);
+ assert(state->bs != NULL);
+
+ s = state->bs->opaque;
+
+ state->opaque = g_malloc0(sizeof(BDRVRawReopenState));
+ raw_s = state->opaque;
+
+#ifdef CONFIG_LINUX_AIO
+ raw_s->use_aio = s->use_aio;
+
+ /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
+ * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
+ * won't override aio_ctx if aio_ctx is non-NULL */
+ if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
+ return -1;
+ }
+#endif
+
+ raw_parse_flags(state->flags, &raw_s->open_flags);
+
+ raw_s->fd = -1;
+
+ int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK;
+#ifdef O_NOATIME
+ fcntl_flags |= O_NOATIME;
+#endif
+
+ if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
+ /* dup the original fd */
+ /* TODO: use qemu fcntl wrapper */
+#ifdef F_DUPFD_CLOEXEC
+ raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
+#else
+ raw_s->fd = dup(s->fd);
+ if (raw_s->fd != -1) {
+ qemu_set_cloexec(raw_s->fd);
+ }
+#endif
+ if (raw_s->fd >= 0) {
+ ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
+ if (ret) {
+ qemu_close(raw_s->fd);
+ raw_s->fd = -1;
+ }
+ }
+ }
+
+ /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
+ if (raw_s->fd == -1) {
+ assert(!(raw_s->open_flags & O_CREAT));
+ raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
+ if (raw_s->fd == -1) {
+ ret = -1;
+ }
+ }
+ return ret;
+}
+
+
+static void raw_reopen_commit(BDRVReopenState *state)
+{
+ BDRVRawReopenState *raw_s = state->opaque;
+ BDRVRawState *s = state->bs->opaque;
+
+ s->open_flags = raw_s->open_flags;
+
+ qemu_close(s->fd);
+ s->fd = raw_s->fd;
+#ifdef CONFIG_LINUX_AIO
+ s->use_aio = raw_s->use_aio;
+#endif
+
+ g_free(state->opaque);
+ state->opaque = NULL;
+}
+
+
+static void raw_reopen_abort(BDRVReopenState *state)
+{
+ BDRVRawReopenState *raw_s = state->opaque;
+
+ /* nothing to do if NULL, we didn't get far enough */
+ if (raw_s == NULL) {
+ return;
+ }
+
+ if (raw_s->fd >= 0) {
+ qemu_close(raw_s->fd);
+ raw_s->fd = -1;
+ }
+ g_free(state->opaque);
+ state->opaque = NULL;
+}
+
+
/* XXX: use host sector size if necessary with:
#ifdef DIOCGSECTORSIZE
{
@@ -330,7 +448,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
* boundary. Check if this is the case or tell the low-level
* driver that it needs to copy the buffer.
*/
- if (s->aligned_buf) {
+ if ((bs->open_flags & BDRV_O_NOCACHE)) {
if (!qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
@@ -378,8 +496,6 @@ static void raw_close(BlockDriverState *bs)
if (s->fd >= 0) {
qemu_close(s->fd);
s->fd = -1;
- if (s->aligned_buf != NULL)
- qemu_vfree(s->aligned_buf);
}
}
@@ -735,6 +851,9 @@ static BlockDriver bdrv_file = {
.instance_size = sizeof(BDRVRawState),
.bdrv_probe = NULL, /* no probe for protocols */
.bdrv_file_open = raw_open,
+ .bdrv_reopen_prepare = raw_reopen_prepare,
+ .bdrv_reopen_commit = raw_reopen_commit,
+ .bdrv_reopen_abort = raw_reopen_abort,
.bdrv_close = raw_close,
.bdrv_create = raw_create,
.bdrv_co_discard = raw_co_discard,
diff --git a/block/raw-win32.c b/block/raw-win32.c
index c56bf83375..78c830648b 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -77,6 +77,23 @@ static int set_sparse(int fd)
NULL, 0, NULL, 0, &returned, NULL);
}
+static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
+{
+ assert(access_flags != NULL);
+ assert(overlapped != NULL);
+
+ if (flags & BDRV_O_RDWR) {
+ *access_flags = GENERIC_READ | GENERIC_WRITE;
+ } else {
+ *access_flags = GENERIC_READ;
+ }
+
+ *overlapped = FILE_ATTRIBUTE_NORMAL;
+ if (flags & BDRV_O_NOCACHE) {
+ *overlapped |= FILE_FLAG_NO_BUFFERING;
+ }
+}
+
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
{
BDRVRawState *s = bs->opaque;
@@ -85,17 +102,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
s->type = FTYPE_FILE;
- if (flags & BDRV_O_RDWR) {
- access_flags = GENERIC_READ | GENERIC_WRITE;
- } else {
- access_flags = GENERIC_READ;
- }
+ raw_parse_flags(flags, &access_flags, &overlapped);
- overlapped = FILE_ATTRIBUTE_NORMAL;
- if (flags & BDRV_O_NOCACHE)
- overlapped |= FILE_FLAG_NO_BUFFERING;
- if (!(flags & BDRV_O_CACHE_WB))
- overlapped |= FILE_FLAG_WRITE_THROUGH;
s->hfile = CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
OPEN_EXISTING, overlapped, NULL);
@@ -374,18 +382,10 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
}
s->type = find_device_type(bs, filename);
- if (flags & BDRV_O_RDWR) {
- access_flags = GENERIC_READ | GENERIC_WRITE;
- } else {
- access_flags = GENERIC_READ;
- }
+ raw_parse_flags(flags, &access_flags, &overlapped);
+
create_flags = OPEN_EXISTING;
- overlapped = FILE_ATTRIBUTE_NORMAL;
- if (flags & BDRV_O_NOCACHE)
- overlapped |= FILE_FLAG_NO_BUFFERING;
- if (!(flags & BDRV_O_CACHE_WB))
- overlapped |= FILE_FLAG_WRITE_THROUGH;
s->hfile = CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
create_flags, overlapped, NULL);
diff --git a/block/raw.c b/block/raw.c
index ff34ea41e7..253e949b8c 100644
--- a/block/raw.c
+++ b/block/raw.c
@@ -9,6 +9,14 @@ static int raw_open(BlockDriverState *bs, int flags)
return 0;
}
+/* We have nothing to do for raw reopen, stubs just return
+ * success */
+static int raw_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
@@ -115,6 +123,8 @@ static BlockDriver bdrv_raw = {
.bdrv_open = raw_open,
.bdrv_close = raw_close,
+ .bdrv_reopen_prepare = raw_reopen_prepare,
+
.bdrv_co_readv = raw_co_readv,
.bdrv_co_writev = raw_co_writev,
.bdrv_co_is_allocated = raw_co_is_allocated,
diff --git a/block/rbd.c b/block/rbd.c
index 5a0f79fc8f..015a9db0ad 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -487,12 +487,6 @@ static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags)
rados_conf_set(s->cluster, "rbd_cache", "false");
} else {
rados_conf_set(s->cluster, "rbd_cache", "true");
- if (!(flags & BDRV_O_CACHE_WB)) {
- r = rados_conf_set(s->cluster, "rbd_cache_max_dirty", "0");
- if (r < 0) {
- rados_conf_set(s->cluster, "rbd_cache", "false");
- }
- }
}
if (strstr(conf, "conf=") == NULL) {
diff --git a/block/sheepdog.c b/block/sheepdog.c
index e0753ee9e5..4742f8ae6f 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1114,14 +1114,12 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
goto out;
}
- if (flags & BDRV_O_CACHE_WB) {
- s->cache_enabled = 1;
- s->flush_fd = connect_to_sdog(s->addr, s->port);
- if (s->flush_fd < 0) {
- error_report("failed to connect");
- ret = s->flush_fd;
- goto out;
- }
+ s->cache_enabled = 1;
+ s->flush_fd = connect_to_sdog(s->addr, s->port);
+ if (s->flush_fd < 0) {
+ error_report("failed to connect");
+ ret = s->flush_fd;
+ goto out;
}
if (snapid || tag[0] != '\0') {
diff --git a/block/vdi.c b/block/vdi.c
index 550cf58a39..f35b12ec98 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -454,6 +454,12 @@ static int vdi_open(BlockDriverState *bs, int flags)
return -1;
}
+static int vdi_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum)
{
@@ -761,6 +767,7 @@ static BlockDriver bdrv_vdi = {
.bdrv_probe = vdi_probe,
.bdrv_open = vdi_open,
.bdrv_close = vdi_close,
+ .bdrv_reopen_prepare = vdi_reopen_prepare,
.bdrv_create = vdi_create,
.bdrv_co_is_allocated = vdi_co_is_allocated,
.bdrv_make_empty = vdi_make_empty,
diff --git a/block/vmdk.c b/block/vmdk.c
index bba4c61a74..f2e861b074 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -300,6 +300,40 @@ static int vmdk_is_cid_valid(BlockDriverState *bs)
return 1;
}
+/* Queue extents, if any, for reopen() */
+static int vmdk_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ BDRVVmdkState *s;
+ int ret = -1;
+ int i;
+ VmdkExtent *e;
+
+ assert(state != NULL);
+ assert(state->bs != NULL);
+
+ if (queue == NULL) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "No reopen queue for VMDK extents");
+ goto exit;
+ }
+
+ s = state->bs->opaque;
+
+ assert(s != NULL);
+
+ for (i = 0; i < s->num_extents; i++) {
+ e = &s->extents[i];
+ if (e->file != state->bs->file) {
+ bdrv_reopen_queue(queue, e->file, state->flags);
+ }
+ }
+ ret = 0;
+
+exit:
+ return ret;
+}
+
static int vmdk_parent_open(BlockDriverState *bs)
{
char *p_name;
@@ -1646,6 +1680,7 @@ static BlockDriver bdrv_vmdk = {
.instance_size = sizeof(BDRVVmdkState),
.bdrv_probe = vmdk_probe,
.bdrv_open = vmdk_open,
+ .bdrv_reopen_prepare = vmdk_reopen_prepare,
.bdrv_read = vmdk_co_read,
.bdrv_write = vmdk_co_write,
.bdrv_close = vmdk_close,
diff --git a/block/vpc.c b/block/vpc.c
index c0b82c4f57..b6bf52f140 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -265,6 +265,12 @@ static int vpc_open(BlockDriverState *bs, int flags)
return err;
}
+static int vpc_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
/*
* Returns the absolute byte offset of the given sector in the image file.
* If the sector is not allocated, -1 is returned instead.
@@ -783,6 +789,7 @@ static BlockDriver bdrv_vpc = {
.bdrv_probe = vpc_probe,
.bdrv_open = vpc_open,
.bdrv_close = vpc_close,
+ .bdrv_reopen_prepare = vpc_reopen_prepare,
.bdrv_create = vpc_create,
.bdrv_read = vpc_co_read,
diff --git a/block_int.h b/block_int.h
index 4452f6f398..ac4245cb18 100644
--- a/block_int.h
+++ b/block_int.h
@@ -139,6 +139,13 @@ struct BlockDriver {
int instance_size;
int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
int (*bdrv_probe_device)(const char *filename);
+
+ /* For handling image reopen for split or non-split files */
+ int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp);
+ void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
+ void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
+
int (*bdrv_open)(BlockDriverState *bs, int flags);
int (*bdrv_file_open)(BlockDriverState *bs, const char *filename, int flags);
int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
@@ -268,7 +275,6 @@ struct BlockDriverState {
int64_t total_sectors; /* if we are reading a disk image, give its
size in sectors */
int read_only; /* if true, the media is read only */
- int keep_read_only; /* if true, the media was requested to stay read only */
int open_flags; /* flags used to open the file, re-used for re-open */
int encrypted; /* if true, the media is encrypted */
int valid_key; /* if true, a valid encryption key has been set */
@@ -336,6 +342,7 @@ struct BlockDriverState {
/* long-running background operation */
BlockJob *job;
+
};
int get_tmp_filename(char *filename, int size);
diff --git a/blockdev.c b/blockdev.c
index 7c83baa353..e5d450f0bb 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -527,6 +527,8 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
if_name[type], mediastr, unit_id);
}
dinfo->bdrv = bdrv_new(dinfo->id);
+ dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
+ dinfo->bdrv->read_only = ro;
dinfo->devaddr = devaddr;
dinfo->type = type;
dinfo->bus = bus_id;