aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/quorum.c97
-rw-r--r--qapi/block-core.json5
-rwxr-xr-xtests/qemu-iotests/08115
-rw-r--r--tests/qemu-iotests/081.out10
4 files changed, 119 insertions, 8 deletions
diff --git a/block/quorum.c b/block/quorum.c
index 86802d306b..d5ee9c0059 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -23,6 +23,7 @@
#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
#define QUORUM_OPT_BLKVERIFY "blkverify"
+#define QUORUM_OPT_REWRITE "rewrite-corrupted"
/* This union holds a vote hash value */
typedef union QuorumVoteValue {
@@ -70,6 +71,9 @@ typedef struct BDRVQuorumState {
* It is useful to debug other block drivers by
* comparing them with a reference one.
*/
+ bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted
+ * block if Quorum is reached.
+ */
} BDRVQuorumState;
typedef struct QuorumAIOCB QuorumAIOCB;
@@ -105,13 +109,17 @@ struct QuorumAIOCB {
int count; /* number of completed AIOCB */
int success_count; /* number of successfully completed AIOCB */
+ int rewrite_count; /* number of replica to rewrite: count down to
+ * zero once writes are fired
+ */
+
QuorumVotes votes;
bool is_read;
int vote_ret;
};
-static void quorum_vote(QuorumAIOCB *acb);
+static bool quorum_vote(QuorumAIOCB *acb);
static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
{
@@ -183,6 +191,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
acb->count = 0;
acb->success_count = 0;
+ acb->rewrite_count = 0;
acb->votes.compare = quorum_sha256_compare;
QLIST_INIT(&acb->votes.vote_list);
acb->is_read = false;
@@ -232,11 +241,27 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
return false;
}
+static void quorum_rewrite_aio_cb(void *opaque, int ret)
+{
+ QuorumAIOCB *acb = opaque;
+
+ /* one less rewrite to do */
+ acb->rewrite_count--;
+
+ /* wait until all rewrite callbacks have completed */
+ if (acb->rewrite_count) {
+ return;
+ }
+
+ quorum_aio_finalize(acb);
+}
+
static void quorum_aio_cb(void *opaque, int ret)
{
QuorumChildRequest *sacb = opaque;
QuorumAIOCB *acb = sacb->parent;
BDRVQuorumState *s = acb->common.bs->opaque;
+ bool rewrite = false;
sacb->ret = ret;
acb->count++;
@@ -253,12 +278,15 @@ static void quorum_aio_cb(void *opaque, int ret)
/* Do the vote on read */
if (acb->is_read) {
- quorum_vote(acb);
+ rewrite = quorum_vote(acb);
} else {
quorum_has_too_much_io_failed(acb);
}
- quorum_aio_finalize(acb);
+ /* if no rewrite is done the code will finish right away */
+ if (!rewrite) {
+ quorum_aio_finalize(acb);
+ }
}
static void quorum_report_bad_versions(BDRVQuorumState *s,
@@ -278,6 +306,43 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
}
}
+static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
+ QuorumVoteValue *value)
+{
+ QuorumVoteVersion *version;
+ QuorumVoteItem *item;
+ int count = 0;
+
+ /* first count the number of bad versions: done first to avoid concurrency
+ * issues.
+ */
+ QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+ if (acb->votes.compare(&version->value, value)) {
+ continue;
+ }
+ QLIST_FOREACH(item, &version->items, next) {
+ count++;
+ }
+ }
+
+ /* quorum_rewrite_aio_cb will count down this to zero */
+ acb->rewrite_count = count;
+
+ /* now fire the correcting rewrites */
+ QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+ if (acb->votes.compare(&version->value, value)) {
+ continue;
+ }
+ QLIST_FOREACH(item, &version->items, next) {
+ bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
+ acb->nb_sectors, quorum_rewrite_aio_cb, acb);
+ }
+ }
+
+ /* return true if any rewrite is done else false */
+ return count;
+}
+
static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
{
int i;
@@ -468,16 +533,17 @@ static int quorum_vote_error(QuorumAIOCB *acb)
return ret;
}
-static void quorum_vote(QuorumAIOCB *acb)
+static bool quorum_vote(QuorumAIOCB *acb)
{
bool quorum = true;
+ bool rewrite = false;
int i, j, ret;
QuorumVoteValue hash;
BDRVQuorumState *s = acb->common.bs->opaque;
QuorumVoteVersion *winner;
if (quorum_has_too_much_io_failed(acb)) {
- return;
+ return false;
}
/* get the index of the first successful read */
@@ -505,7 +571,7 @@ static void quorum_vote(QuorumAIOCB *acb)
/* Every successful read agrees */
if (quorum) {
quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
- return;
+ return false;
}
/* compute hashes for each successful read, also store indexes */
@@ -538,9 +604,15 @@ static void quorum_vote(QuorumAIOCB *acb)
/* some versions are bad print them */
quorum_report_bad_versions(s, acb, &winner->value);
+ /* corruption correction is enabled */
+ if (s->rewrite_corrupted) {
+ rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
+ }
+
free_exit:
/* free lists */
quorum_free_vote_list(&acb->votes);
+ return rewrite;
}
static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
@@ -705,6 +777,11 @@ static QemuOptsList quorum_runtime_opts = {
.type = QEMU_OPT_BOOL,
.help = "Trigger block verify mode if set",
},
+ {
+ .name = QUORUM_OPT_REWRITE,
+ .type = QEMU_OPT_BOOL,
+ .help = "Rewrite corrupted block on read quorum",
+ },
{ /* end of list */ }
},
};
@@ -766,6 +843,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
"and using two files with vote_threshold=2\n");
}
+ s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false);
+ if (s->rewrite_corrupted && s->is_blkverify) {
+ error_setg(&local_err,
+ "rewrite-corrupted=on cannot be used with blkverify=on");
+ ret = -EINVAL;
+ goto exit;
+ }
+
/* allocate the children BlockDriverState array */
s->bs = g_new0(BlockDriverState *, s->num_children);
opened = g_new0(bool, s->num_children);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index af6b436540..de31f9fd54 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1329,12 +1329,15 @@
#
# @vote-threshold: the vote limit under which a read will fail
#
+# @rewrite-corrupted: #optional rewrite corrupted data when quorum is reached
+# (Since 2.1)
+#
# Since: 2.0
##
{ 'type': 'BlockdevOptionsQuorum',
'data': { '*blkverify': 'bool',
'children': [ 'BlockdevRef' ],
- 'vote-threshold': 'int' } }
+ 'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } }
##
# @BlockdevOptions
diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
index b512d00cc8..7ae4be2053 100755
--- a/tests/qemu-iotests/081
+++ b/tests/qemu-iotests/081
@@ -134,15 +134,28 @@ run_qemu -drive "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2" <<EOF
EOF
echo
+echo "== using quorum rewrite corrupted mode =="
+
+quorum="$quorum,file.rewrite-corrupted=on"
+
+$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
+
+echo
+echo "== checking that quorum has corrected the corrupted file =="
+
+$QEMU_IO -c "read -P 0x32 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
+
+echo
echo "== breaking quorum =="
$QEMU_IO -c "write -P 0x41 0 $size" "$TEST_DIR/1.raw" | _filter_qemu_io
+$QEMU_IO -c "write -P 0x42 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
+
echo
echo "== checking that quorum is broken =="
$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
-
# success, all done
echo "*** done"
rm -f $seq.full
diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
index 2241cec148..073515ea50 100644
--- a/tests/qemu-iotests/081.out
+++ b/tests/qemu-iotests/081.out
@@ -40,9 +40,19 @@ read 10485760/10485760 bytes at offset 0
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}}
+== using quorum rewrite corrupted mode ==
+read 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== checking that quorum has corrected the corrupted file ==
+read 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
== breaking quorum ==
wrote 10485760/10485760 bytes at offset 0
10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
== checking that quorum is broken ==
qemu-io: can't open: Could not read image for determining its format: Input/output error