aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2012-04-23 14:27:04 -0500
committerAnthony Liguori <aliguori@us.ibm.com>2012-04-23 14:27:04 -0500
commit1f8bcac09af61e58c5121aa0a932190700ad554d (patch)
tree4bb240289095295eb5b3806d6347cd866a55a542 /block
parentcb4c2548ea7cceef7260465773c6b8e634c186d4 (diff)
parent1042ec94b19e0bfaae74c912ebbdfdbff8dd7db2 (diff)
Merge remote-tracking branch 'kwolf/for-anthony' into staging
* kwolf/for-anthony: (38 commits) qemu-iotests: Fix test 031 for qcow2 v3 support qemu-iotests: Add -o and make v3 the default for qcow2 qcow2: Zero write support qemu-iotests: Test backing file COW with zero clusters qemu-iotests: add a simple test for write_zeroes qcow2: Support for feature table header extension qcow2: Support reading zero clusters qcow2: Version 3 images qcow2: Ignore reserved bits in check_refcounts qcow2: Ignore reserved bits in refcount table entries qcow2: Simplify count_cow_clusters qcow2: Refactor qcow2_free_any_clusters qcow2: Ignore reserved bits in L1/L2 entries qcow2: Fail write_compressed when overwriting data qcow2: Ignore reserved bits in count_contiguous_clusters() qcow2: Ignore reserved bits in get_cluster_offset qcow2: Save disk size in snapshot header Specification for qcow2 version 3 qcow2: Fix refcount block allocation during qcow2_alloc_cluster_at() iotests: Resolve test failures caused by hostname ...
Diffstat (limited to 'block')
-rw-r--r--block/cow.c2
-rw-r--r--block/curl.c10
-rw-r--r--block/iscsi.c4
-rw-r--r--block/nbd.c8
-rw-r--r--block/qcow2-cluster.c226
-rw-r--r--block/qcow2-refcount.c164
-rw-r--r--block/qcow2-snapshot.c16
-rw-r--r--block/qcow2.c252
-rw-r--r--block/qcow2.h59
-rw-r--r--block/rbd.c5
-rw-r--r--block/sheepdog.c11
11 files changed, 581 insertions, 176 deletions
diff --git a/block/cow.c b/block/cow.c
index 8d3c9f873c..a5a00eb9ca 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -103,7 +103,7 @@ static int cow_open(BlockDriverState *bs, int flags)
}
/*
- * XXX(hch): right now these functions are extremely ineffcient.
+ * XXX(hch): right now these functions are extremely inefficient.
* We should just read the whole bitmap we'll need in one go instead.
*/
static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum)
diff --git a/block/curl.c b/block/curl.c
index a909eca337..bf3680ba57 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -89,19 +89,17 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
switch (action) {
case CURL_POLL_IN:
- qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, curl_aio_flush,
- NULL, s);
+ qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, curl_aio_flush, s);
break;
case CURL_POLL_OUT:
- qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, curl_aio_flush,
- NULL, s);
+ qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, curl_aio_flush, s);
break;
case CURL_POLL_INOUT:
qemu_aio_set_fd_handler(fd, curl_multi_do, curl_multi_do,
- curl_aio_flush, NULL, s);
+ curl_aio_flush, s);
break;
case CURL_POLL_REMOVE:
- qemu_aio_set_fd_handler(fd, NULL, NULL, NULL, NULL, NULL);
+ qemu_aio_set_fd_handler(fd, NULL, NULL, NULL, NULL);
break;
}
diff --git a/block/iscsi.c b/block/iscsi.c
index bd3ca11b2e..5222726d0f 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -108,7 +108,7 @@ iscsi_set_events(IscsiLun *iscsilun)
qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), iscsi_process_read,
(iscsi_which_events(iscsi) & POLLOUT)
? iscsi_process_write : NULL,
- iscsi_process_flush, NULL, iscsilun);
+ iscsi_process_flush, iscsilun);
}
static void
@@ -682,7 +682,7 @@ static void iscsi_close(BlockDriverState *bs)
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = iscsilun->iscsi;
- qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL, NULL, NULL);
+ qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL, NULL);
iscsi_destroy_context(iscsi);
memset(iscsilun, 0, sizeof(IscsiLun));
}
diff --git a/block/nbd.c b/block/nbd.c
index e0af5b4725..56dbf6ef86 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -203,7 +203,7 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
qemu_co_mutex_lock(&s->send_mutex);
s->send_coroutine = qemu_coroutine_self();
qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write,
- nbd_have_request, NULL, s);
+ nbd_have_request, s);
rc = nbd_send_request(s->sock, request);
if (rc >= 0 && iov) {
ret = qemu_co_sendv(s->sock, iov, request->len, offset);
@@ -212,7 +212,7 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
}
}
qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL,
- nbd_have_request, NULL, s);
+ nbd_have_request, s);
s->send_coroutine = NULL;
qemu_co_mutex_unlock(&s->send_mutex);
return rc;
@@ -285,7 +285,7 @@ static int nbd_establish_connection(BlockDriverState *bs)
* kick the reply mechanism. */
socket_set_nonblock(sock);
qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL,
- nbd_have_request, NULL, s);
+ nbd_have_request, s);
s->sock = sock;
s->size = size;
@@ -305,7 +305,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
request.len = 0;
nbd_send_request(s->sock, &request);
- qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL, NULL);
+ qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
closesocket(s->sock);
}
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index cbd224dc46..a747a88e13 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -195,7 +195,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
l2_table = *table;
- if (old_l2_offset == 0) {
+ if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
/* if there was no old l2 table, clear the new table */
memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
} else {
@@ -203,7 +203,8 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
/* if there was an old l2 table, read it from the disk */
BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
- ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_offset,
+ ret = qcow2_cache_get(bs, s->l2_table_cache,
+ old_l2_offset & L1E_OFFSET_MASK,
(void**) &old_table);
if (ret < 0) {
goto fail;
@@ -246,28 +247,44 @@ fail:
return ret;
}
+/*
+ * Checks how many clusters in a given L2 table are contiguous in the image
+ * file. As soon as one of the flags in the bitmask stop_flags changes compared
+ * to the first cluster, the search is stopped and the cluster is not counted
+ * as contiguous. (This allows it, for example, to stop at the first compressed
+ * cluster which may require a different handling)
+ */
static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
- uint64_t *l2_table, uint64_t start, uint64_t mask)
+ uint64_t *l2_table, uint64_t start, uint64_t stop_flags)
{
int i;
- uint64_t offset = be64_to_cpu(l2_table[0]) & ~mask;
+ uint64_t mask = stop_flags | L2E_OFFSET_MASK;
+ uint64_t offset = be64_to_cpu(l2_table[0]) & mask;
if (!offset)
return 0;
- for (i = start; i < start + nb_clusters; i++)
- if (offset + (uint64_t) i * cluster_size != (be64_to_cpu(l2_table[i]) & ~mask))
+ for (i = start; i < start + nb_clusters; i++) {
+ uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
+ if (offset + (uint64_t) i * cluster_size != l2_entry) {
break;
+ }
+ }
return (i - start);
}
static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
{
- int i = 0;
+ int i;
+
+ for (i = 0; i < nb_clusters; i++) {
+ int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
- while(nb_clusters-- && l2_table[i] == 0)
- i++;
+ if (type != QCOW2_CLUSTER_UNALLOCATED) {
+ break;
+ }
+ }
return i;
}
@@ -367,11 +384,9 @@ out:
*
* on exit, *num is the number of contiguous sectors we can read.
*
- * Return 0, if the offset is found
- * Return -errno, otherwise.
- *
+ * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
+ * cases.
*/
-
int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
int *num, uint64_t *cluster_offset)
{
@@ -407,19 +422,19 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
/* seek the the l2 offset in the l1 table */
l1_index = offset >> l1_bits;
- if (l1_index >= s->l1_size)
+ if (l1_index >= s->l1_size) {
+ ret = QCOW2_CLUSTER_UNALLOCATED;
goto out;
+ }
- l2_offset = s->l1_table[l1_index];
-
- /* seek the l2 table of the given l2 offset */
-
- if (!l2_offset)
+ l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
+ if (!l2_offset) {
+ ret = QCOW2_CLUSTER_UNALLOCATED;
goto out;
+ }
/* load the l2 table in memory */
- l2_offset &= ~QCOW_OFLAG_COPIED;
ret = l2_load(bs, l2_offset, &l2_table);
if (ret < 0) {
return ret;
@@ -431,26 +446,44 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
*cluster_offset = be64_to_cpu(l2_table[l2_index]);
nb_clusters = size_to_clusters(s, nb_needed << 9);
- if (!*cluster_offset) {
+ ret = qcow2_get_cluster_type(*cluster_offset);
+ switch (ret) {
+ case QCOW2_CLUSTER_COMPRESSED:
+ /* Compressed clusters can only be processed one by one */
+ c = 1;
+ *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
+ break;
+ case QCOW2_CLUSTER_ZERO:
+ c = count_contiguous_clusters(nb_clusters, s->cluster_size,
+ &l2_table[l2_index], 0,
+ QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
+ *cluster_offset = 0;
+ break;
+ case QCOW2_CLUSTER_UNALLOCATED:
/* how many empty clusters ? */
c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
- } else {
+ *cluster_offset = 0;
+ break;
+ case QCOW2_CLUSTER_NORMAL:
/* how many allocated clusters ? */
c = count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], 0, QCOW_OFLAG_COPIED);
+ &l2_table[l2_index], 0,
+ QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
+ *cluster_offset &= L2E_OFFSET_MASK;
+ break;
}
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- nb_available = (c * s->cluster_sectors);
+ nb_available = (c * s->cluster_sectors);
+
out:
if (nb_available > nb_needed)
nb_available = nb_needed;
*num = nb_available - index_in_cluster;
- *cluster_offset &=~QCOW_OFLAG_COPIED;
- return 0;
+ return ret;
}
/*
@@ -483,13 +516,13 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
return ret;
}
}
- l2_offset = s->l1_table[l1_index];
+
+ l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
/* seek the l2 table of the given l2 offset */
- if (l2_offset & QCOW_OFLAG_COPIED) {
+ if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) {
/* load the l2 table in memory */
- l2_offset &= ~QCOW_OFLAG_COPIED;
ret = l2_load(bs, l2_offset, &l2_table);
if (ret < 0) {
return ret;
@@ -505,7 +538,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
if (l2_offset) {
qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t));
}
- l2_offset = s->l1_table[l1_index] & ~QCOW_OFLAG_COPIED;
+ l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
}
/* find the cluster offset for the given disk offset */
@@ -546,15 +579,14 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
return 0;
}
+ /* Compression can't overwrite anything. Fail if the cluster was already
+ * allocated. */
cluster_offset = be64_to_cpu(l2_table[l2_index]);
- if (cluster_offset & QCOW_OFLAG_COPIED) {
+ if (cluster_offset & L2E_OFFSET_MASK) {
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
return 0;
}
- if (cluster_offset)
- qcow2_free_any_clusters(bs, cluster_offset, 1);
-
cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
if (cluster_offset < 0) {
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
@@ -663,8 +695,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
*/
if (j != 0) {
for (i = 0; i < j; i++) {
- qcow2_free_any_clusters(bs,
- be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, 1);
+ qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1);
}
}
@@ -682,29 +713,28 @@ err:
static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
uint64_t *l2_table, int l2_index)
{
- int i = 0;
- uint64_t cluster_offset;
+ int i;
- while (i < nb_clusters) {
- i += count_contiguous_clusters(nb_clusters - i, s->cluster_size,
- &l2_table[l2_index], i, 0);
- if ((i >= nb_clusters) || be64_to_cpu(l2_table[l2_index + i])) {
- break;
- }
+ for (i = 0; i < nb_clusters; i++) {
+ uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
+ int cluster_type = qcow2_get_cluster_type(l2_entry);
- i += count_contiguous_free_clusters(nb_clusters - i,
- &l2_table[l2_index + i]);
- if (i >= nb_clusters) {
+ switch(cluster_type) {
+ case QCOW2_CLUSTER_NORMAL:
+ if (l2_entry & QCOW_OFLAG_COPIED) {
+ goto out;
+ }
break;
- }
-
- cluster_offset = be64_to_cpu(l2_table[l2_index + i]);
-
- if ((cluster_offset & QCOW_OFLAG_COPIED) ||
- (cluster_offset & QCOW_OFLAG_COMPRESSED))
+ case QCOW2_CLUSTER_UNALLOCATED:
+ case QCOW2_CLUSTER_COMPRESSED:
+ case QCOW2_CLUSTER_ZERO:
break;
+ default:
+ abort();
+ }
}
+out:
assert(i <= nb_clusters);
return i;
}
@@ -842,10 +872,14 @@ again:
* Check how many clusters are already allocated and don't need COW, and how
* many need a new allocation.
*/
- if (cluster_offset & QCOW_OFLAG_COPIED) {
+ if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL
+ && (cluster_offset & QCOW_OFLAG_COPIED))
+ {
/* We keep all QCOW_OFLAG_COPIED clusters */
- keep_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], 0, 0);
+ keep_clusters =
+ count_contiguous_clusters(nb_clusters, s->cluster_size,
+ &l2_table[l2_index], 0,
+ QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
assert(keep_clusters <= nb_clusters);
nb_clusters -= keep_clusters;
} else {
@@ -860,7 +894,7 @@ again:
cluster_offset = 0;
}
- cluster_offset &= ~QCOW_OFLAG_COPIED;
+ cluster_offset &= L2E_OFFSET_MASK;
/* If there is something left to allocate, do that now */
*m = (QCowL2Meta) {
@@ -931,7 +965,7 @@ again:
fail:
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
fail_put:
- if (nb_clusters > 0) {
+ if (m->nb_clusters > 0) {
QLIST_REMOVE(m, next_in_flight);
}
return ret;
@@ -1015,9 +1049,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
uint64_t old_offset;
old_offset = be64_to_cpu(l2_table[l2_index + i]);
- old_offset &= ~QCOW_OFLAG_COPIED;
-
- if (old_offset == 0) {
+ if ((old_offset & L2E_OFFSET_MASK) == 0) {
continue;
}
@@ -1070,3 +1102,75 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
return 0;
}
+
+/*
+ * This zeroes as many clusters of nb_clusters as possible at once (i.e.
+ * all clusters in the same L2 table) and returns the number of zeroed
+ * clusters.
+ */
+static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
+ unsigned int nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l2_table;
+ int l2_index;
+ int ret;
+ int i;
+
+ ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Limit nb_clusters to one L2 table */
+ nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+ for (i = 0; i < nb_clusters; i++) {
+ uint64_t old_offset;
+
+ old_offset = be64_to_cpu(l2_table[l2_index + i]);
+
+ /* Update L2 entries */
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ if (old_offset & QCOW_OFLAG_COMPRESSED) {
+ l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
+ qcow2_free_any_clusters(bs, old_offset, 1);
+ } else {
+ l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO);
+ }
+ }
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return nb_clusters;
+}
+
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
+{
+ BDRVQcowState *s = bs->opaque;
+ unsigned int nb_clusters;
+ int ret;
+
+ /* The zero flag is only supported by version 3 and newer */
+ if (s->qcow_version < 3) {
+ return -ENOTSUP;
+ }
+
+ /* Each L2 table is handled by its own loop iteration */
+ nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
+
+ while (nb_clusters > 0) {
+ ret = zero_single_l2(bs, offset, nb_clusters);
+ if (ret < 0) {
+ return ret;
+ }
+
+ nb_clusters -= ret;
+ offset += (ret * s->cluster_size);
+ }
+
+ return 0;
+}
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index f39928a6bf..812c93c5c7 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -167,7 +167,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
if (refcount_table_index < s->refcount_table_size) {
uint64_t refcount_block_offset =
- s->refcount_table[refcount_table_index];
+ s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
/* If it's already there, we're done */
if (refcount_block_offset) {
@@ -400,7 +400,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
return ret;
}
- return new_block;
+ return 0;
fail_table:
g_free(new_table);
@@ -587,6 +587,7 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
{
BDRVQcowState *s = bs->opaque;
uint64_t cluster_index;
+ uint64_t old_free_cluster_index;
int i, refcount, ret;
/* Check how many clusters there are free */
@@ -602,11 +603,16 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
}
/* And then allocate them */
+ old_free_cluster_index = s->free_cluster_index;
+ s->free_cluster_index = cluster_index + i;
+
ret = update_refcount(bs, offset, i << s->cluster_bits, 1);
if (ret < 0) {
return ret;
}
+ s->free_cluster_index = old_free_cluster_index;
+
return i;
}
@@ -673,32 +679,35 @@ void qcow2_free_clusters(BlockDriverState *bs,
}
/*
- * free_any_clusters
- *
- * free clusters according to its type: compressed or not
- *
+ * Free a cluster using its L2 entry (handles clusters of all types, e.g.
+ * normal cluster, compressed cluster, etc.)
*/
-
void qcow2_free_any_clusters(BlockDriverState *bs,
- uint64_t cluster_offset, int nb_clusters)
+ uint64_t l2_entry, int nb_clusters)
{
BDRVQcowState *s = bs->opaque;
- /* free the cluster */
-
- if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
- int nb_csectors;
- nb_csectors = ((cluster_offset >> s->csize_shift) &
- s->csize_mask) + 1;
- qcow2_free_clusters(bs,
- (cluster_offset & s->cluster_offset_mask) & ~511,
- nb_csectors * 512);
- return;
+ switch (qcow2_get_cluster_type(l2_entry)) {
+ case QCOW2_CLUSTER_COMPRESSED:
+ {
+ int nb_csectors;
+ nb_csectors = ((l2_entry >> s->csize_shift) &
+ s->csize_mask) + 1;
+ qcow2_free_clusters(bs,
+ (l2_entry & s->cluster_offset_mask) & ~511,
+ nb_csectors * 512);
+ }
+ break;
+ case QCOW2_CLUSTER_NORMAL:
+ qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
+ nb_clusters << s->cluster_bits);
+ break;
+ case QCOW2_CLUSTER_UNALLOCATED:
+ case QCOW2_CLUSTER_ZERO:
+ break;
+ default:
+ abort();
}
-
- qcow2_free_clusters(bs, cluster_offset, nb_clusters << s->cluster_bits);
-
- return;
}
@@ -758,7 +767,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
l2_offset = l1_table[i];
if (l2_offset) {
old_l2_offset = l2_offset;
- l2_offset &= ~QCOW_OFLAG_COPIED;
+ l2_offset &= L1E_OFFSET_MASK;
ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
(void**) &l2_table);
@@ -790,10 +799,11 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
/* compressed clusters are never modified */
refcount = 2;
} else {
+ uint64_t cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
if (addend != 0) {
- refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend);
+ refcount = update_cluster_refcount(bs, cluster_index, addend);
} else {
- refcount = get_refcount(bs, offset >> s->cluster_bits);
+ refcount = get_refcount(bs, cluster_index);
}
if (refcount < 0) {
@@ -931,7 +941,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
int check_copied)
{
BDRVQcowState *s = bs->opaque;
- uint64_t *l2_table, offset;
+ uint64_t *l2_table, l2_entry;
int i, l2_size, nb_csectors, refcount;
/* Read L2 table from disk */
@@ -943,54 +953,70 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
/* Do the actual checks */
for(i = 0; i < s->l2_size; i++) {
- offset = be64_to_cpu(l2_table[i]);
- if (offset != 0) {
- if (offset & QCOW_OFLAG_COMPRESSED) {
- /* Compressed clusters don't have QCOW_OFLAG_COPIED */
- if (offset & QCOW_OFLAG_COPIED) {
- fprintf(stderr, "ERROR: cluster %" PRId64 ": "
- "copied flag must never be set for compressed "
- "clusters\n", offset >> s->cluster_bits);
- offset &= ~QCOW_OFLAG_COPIED;
- res->corruptions++;
- }
+ l2_entry = be64_to_cpu(l2_table[i]);
+
+ switch (qcow2_get_cluster_type(l2_entry)) {
+ case QCOW2_CLUSTER_COMPRESSED:
+ /* Compressed clusters don't have QCOW_OFLAG_COPIED */
+ if (l2_entry & QCOW_OFLAG_COPIED) {
+ fprintf(stderr, "ERROR: cluster %" PRId64 ": "
+ "copied flag must never be set for compressed "
+ "clusters\n", l2_entry >> s->cluster_bits);
+ l2_entry &= ~QCOW_OFLAG_COPIED;
+ res->corruptions++;
+ }
- /* Mark cluster as used */
- nb_csectors = ((offset >> s->csize_shift) &
- s->csize_mask) + 1;
- offset &= s->cluster_offset_mask;
- inc_refcounts(bs, res, refcount_table, refcount_table_size,
- offset & ~511, nb_csectors * 512);
- } else {
- /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
- if (check_copied) {
- uint64_t entry = offset;
- offset &= ~QCOW_OFLAG_COPIED;
- refcount = get_refcount(bs, offset >> s->cluster_bits);
- if (refcount < 0) {
- fprintf(stderr, "Can't get refcount for offset %"
- PRIx64 ": %s\n", entry, strerror(-refcount));
- goto fail;
- }
- if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {
- fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
- PRIx64 " refcount=%d\n", entry, refcount);
- res->corruptions++;
- }
- }
+ /* Mark cluster as used */
+ nb_csectors = ((l2_entry >> s->csize_shift) &
+ s->csize_mask) + 1;
+ l2_entry &= s->cluster_offset_mask;
+ inc_refcounts(bs, res, refcount_table, refcount_table_size,
+ l2_entry & ~511, nb_csectors * 512);
+ break;
- /* Mark cluster as used */
- offset &= ~QCOW_OFLAG_COPIED;
- inc_refcounts(bs, res, refcount_table,refcount_table_size,
- offset, s->cluster_size);
+ case QCOW2_CLUSTER_ZERO:
+ if ((l2_entry & L2E_OFFSET_MASK) == 0) {
+ break;
+ }
+ /* fall through */
- /* Correct offsets are cluster aligned */
- if (offset & (s->cluster_size - 1)) {
- fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
- "properly aligned; L2 entry corrupted.\n", offset);
+ case QCOW2_CLUSTER_NORMAL:
+ {
+ /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
+ uint64_t offset = l2_entry & L2E_OFFSET_MASK;
+
+ if (check_copied) {
+ refcount = get_refcount(bs, offset >> s->cluster_bits);
+ if (refcount < 0) {
+ fprintf(stderr, "Can't get refcount for offset %"
+ PRIx64 ": %s\n", l2_entry, strerror(-refcount));
+ goto fail;
+ }
+ if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
+ fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
+ PRIx64 " refcount=%d\n", l2_entry, refcount);
res->corruptions++;
}
}
+
+ /* Mark cluster as used */
+ inc_refcounts(bs, res, refcount_table,refcount_table_size,
+ offset, s->cluster_size);
+
+ /* Correct offsets are cluster aligned */
+ if (offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
+ "properly aligned; L2 entry corrupted.\n", offset);
+ res->corruptions++;
+ }
+ break;
+ }
+
+ case QCOW2_CLUSTER_UNALLOCATED:
+ break;
+
+ default:
+ abort();
}
}
@@ -1061,7 +1087,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
}
/* Mark L2 table as used */
- l2_offset &= ~QCOW_OFLAG_COPIED;
+ l2_offset &= L1E_OFFSET_MASK;
inc_refcounts(bs, res, refcount_table, refcount_table_size,
l2_offset, s->cluster_size);
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 7d3fde5a8a..42f971b590 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -48,6 +48,7 @@ typedef struct QEMU_PACKED QCowSnapshotHeader {
typedef struct QEMU_PACKED QCowSnapshotExtraData {
uint64_t vm_state_size_large;
+ uint64_t disk_size;
} QCowSnapshotExtraData;
void qcow2_free_snapshots(BlockDriverState *bs)
@@ -117,6 +118,12 @@ int qcow2_read_snapshots(BlockDriverState *bs)
sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
}
+ if (extra_data_size >= 16) {
+ sn->disk_size = be64_to_cpu(extra.disk_size);
+ } else {
+ sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
+ }
+
/* Read snapshot ID */
sn->id_str = g_malloc(id_str_size + 1);
ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
@@ -197,6 +204,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
memset(&extra, 0, sizeof(extra));
extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
+ extra.disk_size = cpu_to_be64(sn->disk_size);
id_str_size = strlen(sn->id_str);
name_size = strlen(sn->name);
@@ -330,6 +338,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
sn->id_str = g_strdup(sn_info->id_str);
sn->name = g_strdup(sn_info->name);
+ sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
sn->vm_state_size = sn_info->vm_state_size;
sn->date_sec = sn_info->date_sec;
sn->date_nsec = sn_info->date_nsec;
@@ -426,6 +435,13 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
}
sn = &s->snapshots[snapshot_index];
+ if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
+ error_report("qcow2: Loading snapshots with different disk "
+ "size is not implemented");
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
/*
* Make sure that the current L1 table is big enough to contain the whole
* L1 table of the snapshot. If the snapshot L1 table is smaller, the
diff --git a/block/qcow2.c b/block/qcow2.c
index 70d3141dd1..ad46c03c2f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -54,6 +54,7 @@ typedef struct {
} QCowExtension;
#define QCOW2_EXT_MAGIC_END 0
#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
+#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
{
@@ -61,7 +62,7 @@ static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
if (buf_size >= sizeof(QCowHeader) &&
be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
- be32_to_cpu(cow_header->version) >= QCOW_VERSION)
+ be32_to_cpu(cow_header->version) >= 2)
return 100;
else
return 0;
@@ -76,7 +77,7 @@ static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
* return 0 upon success, non-0 otherwise
*/
static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
- uint64_t end_offset)
+ uint64_t end_offset, void **p_feature_table)
{
BDRVQcowState *s = bs->opaque;
QCowExtension ext;
@@ -134,6 +135,18 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
#endif
break;
+ case QCOW2_EXT_MAGIC_FEATURE_TABLE:
+ if (p_feature_table != NULL) {
+ void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
+ ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
+ if (ret < 0) {
+ return ret;
+ }
+
+ *p_feature_table = feature_table;
+ }
+ break;
+
default:
/* unknown magic - save it in case we need to rewrite the header */
{
@@ -169,6 +182,37 @@ static void cleanup_unknown_header_ext(BlockDriverState *bs)
}
}
+static void report_unsupported(BlockDriverState *bs, const char *fmt, ...)
+{
+ char msg[64];
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(msg, sizeof(msg), fmt, ap);
+ va_end(ap);
+
+ qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+ bs->device_name, "qcow2", msg);
+}
+
+static void report_unsupported_feature(BlockDriverState *bs,
+ Qcow2Feature *table, uint64_t mask)
+{
+ while (table && table->name[0] != '\0') {
+ if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
+ if (mask & (1 << table->bit)) {
+ report_unsupported(bs, "%.46s",table->name);
+ mask &= ~(1 << table->bit);
+ }
+ }
+ table++;
+ }
+
+ if (mask) {
+ report_unsupported(bs, "Unknown incompatible feature: %" PRIx64, mask);
+ }
+}
+
static int qcow2_open(BlockDriverState *bs, int flags)
{
BDRVQcowState *s = bs->opaque;
@@ -199,14 +243,73 @@ static int qcow2_open(BlockDriverState *bs, int flags)
ret = -EINVAL;
goto fail;
}
- if (header.version != QCOW_VERSION) {
- char version[64];
- snprintf(version, sizeof(version), "QCOW version %d", header.version);
- qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bs->device_name, "qcow2", version);
+ if (header.version < 2 || header.version > 3) {
+ report_unsupported(bs, "QCOW version %d", header.version);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ s->qcow_version = header.version;
+
+ /* Initialise version 3 header fields */
+ if (header.version == 2) {
+ header.incompatible_features = 0;
+ header.compatible_features = 0;
+ header.autoclear_features = 0;
+ header.refcount_order = 4;
+ header.header_length = 72;
+ } else {
+ be64_to_cpus(&header.incompatible_features);
+ be64_to_cpus(&header.compatible_features);
+ be64_to_cpus(&header.autoclear_features);
+ be32_to_cpus(&header.refcount_order);
+ be32_to_cpus(&header.header_length);
+ }
+
+ if (header.header_length > sizeof(header)) {
+ s->unknown_header_fields_size = header.header_length - sizeof(header);
+ s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
+ ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
+ s->unknown_header_fields_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ if (header.backing_file_offset) {
+ ext_end = header.backing_file_offset;
+ } else {
+ ext_end = 1 << header.cluster_bits;
+ }
+
+ /* Handle feature bits */
+ s->incompatible_features = header.incompatible_features;
+ s->compatible_features = header.compatible_features;
+ s->autoclear_features = header.autoclear_features;
+
+ if (s->incompatible_features != 0) {
+ void *feature_table = NULL;
+ qcow2_read_extensions(bs, header.header_length, ext_end,
+ &feature_table);
+ report_unsupported_feature(bs, feature_table,
+ s->incompatible_features);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ if (!bs->read_only && s->autoclear_features != 0) {
+ s->autoclear_features = 0;
+ qcow2_update_header(bs);
+ }
+
+ /* Check support for various header values */
+ if (header.refcount_order != 4) {
+ report_unsupported(bs, "%d bit reference counts",
+ 1 << header.refcount_order);
ret = -ENOTSUP;
goto fail;
}
+
if (header.cluster_bits < MIN_CLUSTER_BITS ||
header.cluster_bits > MAX_CLUSTER_BITS) {
ret = -EINVAL;
@@ -280,12 +383,7 @@ static int qcow2_open(BlockDriverState *bs, int flags)
QLIST_INIT(&s->cluster_allocs);
/* read qcow2 extensions */
- if (header.backing_file_offset) {
- ext_end = header.backing_file_offset;
- } else {
- ext_end = s->cluster_size;
- }
- if (qcow2_read_extensions(bs, sizeof(header), ext_end)) {
+ if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) {
ret = -EINVAL;
goto fail;
}
@@ -321,6 +419,7 @@ static int qcow2_open(BlockDriverState *bs, int flags)
return ret;
fail:
+ g_free(s->unknown_header_fields);
cleanup_unknown_header_ext(bs);
qcow2_free_snapshots(bs);
qcow2_refcount_close(bs);
@@ -449,7 +548,8 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
qemu_iovec_copy(&hd_qiov, qiov, bytes_done,
cur_nr_sectors * 512);
- if (!cluster_offset) {
+ switch (ret) {
+ case QCOW2_CLUSTER_UNALLOCATED:
if (bs->backing_hd) {
/* read from the base image */
@@ -469,7 +569,17 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
/* Note: in this case, no need to wait */
qemu_iovec_memset(&hd_qiov, 0, 512 * cur_nr_sectors);
}
- } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+ break;
+
+ case QCOW2_CLUSTER_ZERO:
+ if (s->qcow_version < 3) {
+ ret = -EIO;
+ goto fail;
+ }
+ qemu_iovec_memset(&hd_qiov, 0, 512 * cur_nr_sectors);
+ break;
+
+ case QCOW2_CLUSTER_COMPRESSED:
/* add AIO support for compressed blocks ? */
ret = qcow2_decompress_cluster(bs, cluster_offset);
if (ret < 0) {
@@ -479,7 +589,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
qemu_iovec_from_buffer(&hd_qiov,
s->cluster_cache + index_in_cluster * 512,
512 * cur_nr_sectors);
- } else {
+ break;
+
+ case QCOW2_CLUSTER_NORMAL:
if ((cluster_offset & 511) != 0) {
ret = -EIO;
goto fail;
@@ -520,6 +632,12 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
qemu_iovec_from_buffer(&hd_qiov, cluster_data,
512 * cur_nr_sectors);
}
+ break;
+
+ default:
+ g_assert_not_reached();
+ ret = -EIO;
+ goto fail;
}
remaining_sectors -= cur_nr_sectors;
@@ -671,7 +789,9 @@ static void qcow2_close(BlockDriverState *bs)
qcow2_cache_destroy(bs, s->l2_table_cache);
qcow2_cache_destroy(bs, s->refcount_block_cache);
+ g_free(s->unknown_header_fields);
cleanup_unknown_header_ext(bs);
+
g_free(s->cluster_cache);
qemu_vfree(s->cluster_data);
qcow2_refcount_close(bs);
@@ -745,10 +865,10 @@ int qcow2_update_header(BlockDriverState *bs)
int ret;
uint64_t total_size;
uint32_t refcount_table_clusters;
+ size_t header_length;
Qcow2UnknownHeaderExtension *uext;
buf = qemu_blockalign(bs, buflen);
- memset(buf, 0, s->cluster_size);
/* Header structure */
header = (QCowHeader*) buf;
@@ -758,12 +878,14 @@ int qcow2_update_header(BlockDriverState *bs)
goto fail;
}
+ header_length = sizeof(*header) + s->unknown_header_fields_size;
total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
*header = (QCowHeader) {
+ /* Version 2 fields */
.magic = cpu_to_be32(QCOW_MAGIC),
- .version = cpu_to_be32(QCOW_VERSION),
+ .version = cpu_to_be32(s->qcow_version),
.backing_file_offset = 0,
.backing_file_size = 0,
.cluster_bits = cpu_to_be32(s->cluster_bits),
@@ -775,10 +897,42 @@ int qcow2_update_header(BlockDriverState *bs)
.refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
.nb_snapshots = cpu_to_be32(s->nb_snapshots),
.snapshots_offset = cpu_to_be64(s->snapshots_offset),
+
+ /* Version 3 fields */
+ .incompatible_features = cpu_to_be64(s->incompatible_features),
+ .compatible_features = cpu_to_be64(s->compatible_features),
+ .autoclear_features = cpu_to_be64(s->autoclear_features),
+ .refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT),
+ .header_length = cpu_to_be32(header_length),
};
- buf += sizeof(*header);
- buflen -= sizeof(*header);
+ /* For older versions, write a shorter header */
+ switch (s->qcow_version) {
+ case 2:
+ ret = offsetof(QCowHeader, incompatible_features);
+ break;
+ case 3:
+ ret = sizeof(*header);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ buf += ret;
+ buflen -= ret;
+ memset(buf, 0, buflen);
+
+ /* Preserve any unknown field in the header */
+ if (s->unknown_header_fields_size) {
+ if (buflen < s->unknown_header_fields_size) {
+ ret = -ENOSPC;
+ goto fail;
+ }
+
+ memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
+ buf += s->unknown_header_fields_size;
+ buflen -= s->unknown_header_fields_size;
+ }
/* Backing file format header extension */
if (*bs->backing_format) {
@@ -793,6 +947,19 @@ int qcow2_update_header(BlockDriverState *bs)
buflen -= ret;
}
+ /* Feature table */
+ Qcow2Feature features[] = {
+ /* no feature defined yet */
+ };
+
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
+ features, sizeof(features), buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+ buf += ret;
+ buflen -= ret;
+
/* Keep unknown header extensions */
QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
@@ -910,7 +1077,7 @@ static int preallocate(BlockDriverState *bs)
static int qcow2_create2(const char *filename, int64_t total_size,
const char *backing_file, const char *backing_format,
int flags, size_t cluster_size, int prealloc,
- QEMUOptionParameter *options)
+ QEMUOptionParameter *options, int version)
{
/* Calculate cluster_bits */
int cluster_bits;
@@ -954,13 +1121,15 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* Write the header */
memset(&header, 0, sizeof(header));
header.magic = cpu_to_be32(QCOW_MAGIC);
- header.version = cpu_to_be32(QCOW_VERSION);
+ header.version = cpu_to_be32(version);
header.cluster_bits = cpu_to_be32(cluster_bits);
header.size = cpu_to_be64(0);
header.l1_table_offset = cpu_to_be64(0);
header.l1_size = cpu_to_be32(0);
header.refcount_table_offset = cpu_to_be64(cluster_size);
header.refcount_table_clusters = cpu_to_be32(1);
+ header.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT);
+ header.header_length = cpu_to_be32(sizeof(header));
if (flags & BLOCK_FLAG_ENCRYPT) {
header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
@@ -1042,6 +1211,7 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options)
int flags = 0;
size_t cluster_size = DEFAULT_CLUSTER_SIZE;
int prealloc = 0;
+ int version = 2;
/* Read out options */
while (options && options->name) {
@@ -1067,6 +1237,16 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options)
options->value.s);
return -EINVAL;
}
+ } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) {
+ if (!options->value.s || !strcmp(options->value.s, "0.10")) {
+ version = 2;
+ } else if (!strcmp(options->value.s, "1.1")) {
+ version = 3;
+ } else {
+ fprintf(stderr, "Invalid compatibility level: '%s'\n",
+ options->value.s);
+ return -EINVAL;
+ }
}
options++;
}
@@ -1078,7 +1258,7 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options)
}
return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
- cluster_size, prealloc, options);
+ cluster_size, prealloc, options, version);
}
static int qcow2_make_empty(BlockDriverState *bs)
@@ -1101,6 +1281,26 @@ static int qcow2_make_empty(BlockDriverState *bs)
return 0;
}
+static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ int ret;
+ BDRVQcowState *s = bs->opaque;
+
+ /* Emulate misaligned zero writes */
+ if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
+ return -ENOTSUP;
+ }
+
+ /* Whatever is left can use real zero clusters */
+ qemu_co_mutex_lock(&s->lock);
+ ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors);
+ qemu_co_mutex_unlock(&s->lock);
+
+ return ret;
+}
+
static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
{
@@ -1330,6 +1530,11 @@ static QEMUOptionParameter qcow2_create_options[] = {
.help = "Virtual disk size"
},
{
+ .name = BLOCK_OPT_COMPAT_LEVEL,
+ .type = OPT_STRING,
+ .help = "Compatibility level (0.10 or 1.1)"
+ },
+ {
.name = BLOCK_OPT_BACKING_FILE,
.type = OPT_STRING,
.help = "File name of a base image"
@@ -1373,6 +1578,7 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_co_writev = qcow2_co_writev,
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
+ .bdrv_co_write_zeroes = qcow2_co_write_zeroes,
.bdrv_co_discard = qcow2_co_discard,
.bdrv_truncate = qcow2_truncate,
.bdrv_write_compressed = qcow2_write_compressed,
diff --git a/block/qcow2.h b/block/qcow2.h
index e4ac366cfc..93567f6451 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -33,7 +33,6 @@
//#define DEBUG_EXT
#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
-#define QCOW_VERSION 2
#define QCOW_CRYPT_NONE 0
#define QCOW_CRYPT_AES 1
@@ -44,6 +43,8 @@
#define QCOW_OFLAG_COPIED (1LL << 63)
/* indicate that the cluster is compressed (they never have the copied flag) */
#define QCOW_OFLAG_COMPRESSED (1LL << 62)
+/* The cluster reads as all zeros */
+#define QCOW_OFLAG_ZERO (1LL << 0)
#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
@@ -71,6 +72,14 @@ typedef struct QCowHeader {
uint32_t refcount_table_clusters;
uint32_t nb_snapshots;
uint64_t snapshots_offset;
+
+ /* The following fields are only valid for version >= 3 */
+ uint64_t incompatible_features;
+ uint64_t compatible_features;
+ uint64_t autoclear_features;
+
+ uint32_t refcount_order;
+ uint32_t header_length;
} QCowHeader;
typedef struct QCowSnapshot {
@@ -78,6 +87,7 @@ typedef struct QCowSnapshot {
uint32_t l1_size;
char *id_str;
char *name;
+ uint64_t disk_size;
uint64_t vm_state_size;
uint32_t date_sec;
uint32_t date_nsec;
@@ -94,6 +104,18 @@ typedef struct Qcow2UnknownHeaderExtension {
uint8_t data[];
} Qcow2UnknownHeaderExtension;
+enum {
+ QCOW2_FEAT_TYPE_INCOMPATIBLE = 0,
+ QCOW2_FEAT_TYPE_COMPATIBLE = 1,
+ QCOW2_FEAT_TYPE_AUTOCLEAR = 2,
+};
+
+typedef struct Qcow2Feature {
+ uint8_t type;
+ uint8_t bit;
+ char name[46];
+} QEMU_PACKED Qcow2Feature;
+
typedef struct BDRVQcowState {
int cluster_bits;
int cluster_size;
@@ -134,6 +156,14 @@ typedef struct BDRVQcowState {
QCowSnapshot *snapshots;
int flags;
+ int qcow_version;
+
+ uint64_t incompatible_features;
+ uint64_t compatible_features;
+ uint64_t autoclear_features;
+
+ size_t unknown_header_fields_size;
+ void* unknown_header_fields;
QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
} BDRVQcowState;
@@ -164,6 +194,19 @@ typedef struct QCowL2Meta
QLIST_ENTRY(QCowL2Meta) next_in_flight;
} QCowL2Meta;
+enum {
+ QCOW2_CLUSTER_UNALLOCATED,
+ QCOW2_CLUSTER_NORMAL,
+ QCOW2_CLUSTER_COMPRESSED,
+ QCOW2_CLUSTER_ZERO
+};
+
+#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
+#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
+#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
+
+#define REFT_OFFSET_MASK 0xffffffffffffff00ULL
+
static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
{
return (size + (s->cluster_size - 1)) >> s->cluster_bits;
@@ -181,6 +224,19 @@ static inline int64_t align_offset(int64_t offset, int n)
return offset;
}
+static inline int qcow2_get_cluster_type(uint64_t l2_entry)
+{
+ if (l2_entry & QCOW_OFLAG_COMPRESSED) {
+ return QCOW2_CLUSTER_COMPRESSED;
+ } else if (l2_entry & QCOW_OFLAG_ZERO) {
+ return QCOW2_CLUSTER_ZERO;
+ } else if (!(l2_entry & L2E_OFFSET_MASK)) {
+ return QCOW2_CLUSTER_UNALLOCATED;
+ } else {
+ return QCOW2_CLUSTER_NORMAL;
+ }
+}
+
// FIXME Need qcow2_ prefix to global functions
@@ -227,6 +283,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
int nb_sectors);
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
/* qcow2-snapshot.c functions */
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
diff --git a/block/rbd.c b/block/rbd.c
index 46a8579018..6cd84488e4 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -504,7 +504,7 @@ static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags)
fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader,
- NULL, qemu_rbd_aio_flush_cb, NULL, s);
+ NULL, qemu_rbd_aio_flush_cb, s);
return 0;
@@ -525,8 +525,7 @@ static void qemu_rbd_close(BlockDriverState *bs)
close(s->fds[0]);
close(s->fds[1]);
- qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL , NULL, NULL, NULL,
- NULL);
+ qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL, NULL);
rbd_close(s->image);
rados_ioctx_destroy(s->io_ctx);
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 3eaf625e98..0ed6b193c9 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -799,8 +799,7 @@ static int get_sheep_fd(BDRVSheepdogState *s)
return -1;
}
- qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request,
- NULL, s);
+ qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s);
return fd;
}
@@ -973,7 +972,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
qemu_co_mutex_lock(&s->lock);
s->co_send = qemu_coroutine_self();
qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request,
- aio_flush_request, NULL, s);
+ aio_flush_request, s);
socket_set_cork(s->fd, 1);
/* send a header */
@@ -995,7 +994,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
socket_set_cork(s->fd, 0);
qemu_aio_set_fd_handler(s->fd, co_read_response, NULL,
- aio_flush_request, NULL, s);
+ aio_flush_request, s);
qemu_co_mutex_unlock(&s->lock);
return 0;
@@ -1135,7 +1134,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
g_free(buf);
return 0;
out:
- qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL);
+ qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
if (s->fd >= 0) {
closesocket(s->fd);
}
@@ -1349,7 +1348,7 @@ static void sd_close(BlockDriverState *bs)
error_report("%s, %s", sd_strerror(rsp->result), s->name);
}
- qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL);
+ qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
closesocket(s->fd);
if (s->cache_enabled) {
closesocket(s->flush_fd);