diff options
Diffstat (limited to 'block/quorum.c')
-rw-r--r-- | block/quorum.c | 415 |
1 files changed, 297 insertions, 118 deletions
diff --git a/block/quorum.c b/block/quorum.c index eb526cc0f1..db8fe891c4 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -15,8 +15,11 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/module.h" #include "qemu/option.h" +#include "qemu/memalign.h" #include "block/block_int.h" +#include "block/coroutines.h" #include "block/qdict.h" #include "qapi/error.h" #include "qapi/qapi-events-block.h" @@ -28,6 +31,8 @@ #define HASH_LENGTH 32 +#define INDEXSTR_LEN 32 + #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold" #define QUORUM_OPT_BLKVERIFY "blkverify" #define QUORUM_OPT_REWRITE "rewrite-corrupted" @@ -156,11 +161,10 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b) return a->l == b->l; } -static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs, - QEMUIOVector *qiov, - uint64_t offset, - uint64_t bytes, - int flags) +static QuorumAIOCB *coroutine_fn quorum_aio_get(BlockDriverState *bs, + QEMUIOVector *qiov, + uint64_t offset, uint64_t bytes, + int flags) { BDRVQuorumState *s = bs->opaque; QuorumAIOCB *acb = g_new(QuorumAIOCB, 1); @@ -198,11 +202,11 @@ static void quorum_report_bad(QuorumOpType type, uint64_t offset, msg = strerror(-ret); } - qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, start_sector, + qapi_event_send_quorum_report_bad(type, msg, node_name, start_sector, end_sector - start_sector); } -static void quorum_report_failure(QuorumAIOCB *acb) +static void GRAPH_RDLOCK quorum_report_failure(QuorumAIOCB *acb) { const char *reference = bdrv_get_device_or_node_name(acb->bs); int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE; @@ -215,7 +219,7 @@ static void quorum_report_failure(QuorumAIOCB *acb) static int quorum_vote_error(QuorumAIOCB *acb); -static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb) +static bool GRAPH_RDLOCK quorum_has_too_much_io_failed(QuorumAIOCB *acb) { BDRVQuorumState *s = acb->bs->opaque; @@ -228,8 +232,6 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb) return false; } -static int read_fifo_child(QuorumAIOCB *acb); - static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source) { int i; @@ -268,7 +270,11 @@ static void quorum_report_bad_versions(BDRVQuorumState *s, } } -static void quorum_rewrite_entry(void *opaque) +/* + * This function can count as GRAPH_RDLOCK because read_quorum_children() holds + * the graph lock and keeps it until this coroutine has terminated. + */ +static void coroutine_fn GRAPH_RDLOCK quorum_rewrite_entry(void *opaque) { QuorumCo *co = opaque; QuorumAIOCB *acb = co->acb; @@ -288,8 +294,8 @@ static void quorum_rewrite_entry(void *opaque) } } -static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb, - QuorumVoteValue *value) +static bool coroutine_fn GRAPH_RDLOCK +quorum_rewrite_bad_versions(QuorumAIOCB *acb, QuorumVoteValue *value) { QuorumVoteVersion *version; QuorumVoteItem *item; @@ -437,23 +443,7 @@ static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b) return true; } -static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb, - const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - fprintf(stderr, "quorum: offset=%" PRIu64 " bytes=%" PRIu64 " ", - acb->offset, acb->bytes); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - va_end(ap); - exit(1); -} - -static bool quorum_compare(QuorumAIOCB *acb, - QEMUIOVector *a, - QEMUIOVector *b) +static bool quorum_compare(QuorumAIOCB *acb, QEMUIOVector *a, QEMUIOVector *b) { BDRVQuorumState *s = acb->bs->opaque; ssize_t offset; @@ -462,8 +452,10 @@ static bool quorum_compare(QuorumAIOCB *acb, if (s->is_blkverify) { offset = qemu_iovec_compare(a, b); if (offset != -1) { - quorum_err(acb, "contents mismatch at offset %" PRIu64, - acb->offset + offset); + fprintf(stderr, "quorum: offset=%" PRIu64 " bytes=%" PRIu64 + " contents mismatch at offset %" PRIu64 "\n", + acb->offset, acb->bytes, acb->offset + offset); + exit(1); } return true; } @@ -503,7 +495,7 @@ static int quorum_vote_error(QuorumAIOCB *acb) return ret; } -static void quorum_vote(QuorumAIOCB *acb) +static void coroutine_fn GRAPH_RDLOCK quorum_vote(QuorumAIOCB *acb) { bool quorum = true; int i, j, ret; @@ -583,7 +575,11 @@ free_exit: quorum_free_vote_list(&acb->votes); } -static void read_quorum_children_entry(void *opaque) +/* + * This function can count as GRAPH_RDLOCK because read_quorum_children() holds + * the graph lock and keeps it until this coroutine has terminated. + */ +static void coroutine_fn GRAPH_RDLOCK read_quorum_children_entry(void *opaque) { QuorumCo *co = opaque; QuorumAIOCB *acb = co->acb; @@ -611,7 +607,7 @@ static void read_quorum_children_entry(void *opaque) } } -static int read_quorum_children(QuorumAIOCB *acb) +static int coroutine_fn GRAPH_RDLOCK read_quorum_children(QuorumAIOCB *acb) { BDRVQuorumState *s = acb->bs->opaque; int i; @@ -652,7 +648,7 @@ static int read_quorum_children(QuorumAIOCB *acb) return acb->vote_ret; } -static int read_fifo_child(QuorumAIOCB *acb) +static int coroutine_fn GRAPH_RDLOCK read_fifo_child(QuorumAIOCB *acb) { BDRVQuorumState *s = acb->bs->opaque; int n, ret; @@ -673,8 +669,9 @@ static int read_fifo_child(QuorumAIOCB *acb) return ret; } -static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, int flags) +static int coroutine_fn GRAPH_RDLOCK +quorum_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BDRVQuorumState *s = bs->opaque; QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags); @@ -693,7 +690,11 @@ static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset, return ret; } -static void write_quorum_entry(void *opaque) +/* + * This function can count as GRAPH_RDLOCK because quorum_co_pwritev() holds the + * graph lock and keeps it until this coroutine has terminated. + */ +static void coroutine_fn GRAPH_RDLOCK write_quorum_entry(void *opaque) { QuorumCo *co = opaque; QuorumAIOCB *acb = co->acb; @@ -702,8 +703,13 @@ static void write_quorum_entry(void *opaque) QuorumChildRequest *sacb = &acb->qcrs[i]; sacb->bs = s->children[i]->bs; - sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes, - acb->qiov, acb->flags); + if (acb->flags & BDRV_REQ_ZERO_WRITE) { + sacb->ret = bdrv_co_pwrite_zeroes(s->children[i], acb->offset, + acb->bytes, acb->flags); + } else { + sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes, + acb->qiov, acb->flags); + } if (sacb->ret == 0) { acb->success_count++; } else { @@ -719,8 +725,9 @@ static void write_quorum_entry(void *opaque) } } -static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, int flags) +static int coroutine_fn GRAPH_RDLOCK +quorum_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BDRVQuorumState *s = bs->opaque; QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags); @@ -749,19 +756,28 @@ static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset, return ret; } -static int64_t quorum_getlength(BlockDriverState *bs) +static int coroutine_fn GRAPH_RDLOCK +quorum_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, + BdrvRequestFlags flags) +{ + return quorum_co_pwritev(bs, offset, bytes, NULL, + flags | BDRV_REQ_ZERO_WRITE); +} + +static int64_t coroutine_fn GRAPH_RDLOCK +quorum_co_getlength(BlockDriverState *bs) { BDRVQuorumState *s = bs->opaque; int64_t result; int i; /* check that all file have the same length */ - result = bdrv_getlength(s->children[0]->bs); + result = bdrv_co_getlength(s->children[0]->bs); if (result < 0) { return result; } for (i = 1; i < s->num_children; i++) { - int64_t value = bdrv_getlength(s->children[i]->bs); + int64_t value = bdrv_co_getlength(s->children[i]->bs); if (value < 0) { return value; } @@ -773,7 +789,7 @@ static int64_t quorum_getlength(BlockDriverState *bs) return result; } -static coroutine_fn int quorum_co_flush(BlockDriverState *bs) +static coroutine_fn GRAPH_RDLOCK int quorum_co_flush(BlockDriverState *bs) { BDRVQuorumState *s = bs->opaque; QuorumVoteVersion *winner = NULL; @@ -809,17 +825,53 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs) return result; } -static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs, - BlockDriverState *candidate) +static bool GRAPH_RDLOCK +quorum_recurse_can_replace(BlockDriverState *bs, BlockDriverState *to_replace) { BDRVQuorumState *s = bs->opaque; int i; for (i = 0; i < s->num_children; i++) { - bool perm = bdrv_recurse_is_first_non_filter(s->children[i]->bs, - candidate); - if (perm) { - return true; + /* + * We have no idea whether our children show the same data as + * this node (@bs). It is actually highly likely that + * @to_replace does not, because replacing a broken child is + * one of the main use cases here. + * + * We do know that the new BDS will match @bs, so replacing + * any of our children by it will be safe. It cannot change + * the data this quorum node presents to its parents. + * + * However, replacing @to_replace by @bs in any of our + * children's chains may change visible data somewhere in + * there. We therefore cannot recurse down those chains with + * bdrv_recurse_can_replace(). + * (More formally, bdrv_recurse_can_replace() requires that + * @to_replace will be replaced by something matching the @bs + * passed to it. We cannot guarantee that.) + * + * Thus, we can only check whether any of our immediate + * children matches @to_replace. + * + * (In the future, we might add a function to recurse down a + * chain that checks that nothing there cares about a change + * in data from the respective child in question. For + * example, most filters do not care when their child's data + * suddenly changes, as long as their parents do not care.) + */ + if (s->children[i]->bs == to_replace) { + /* + * We now have to ensure that there is no other parent + * that cares about replacing this child by a node with + * potentially different data. + * We do so by checking whether there are any other parents + * at all, which is stricter than necessary, but also very + * simple. (We may decide to implement something more + * complex and permissive when there is an actual need for + * it.) + */ + return QLIST_FIRST(&to_replace->parents) == s->children[i] && + QLIST_NEXT(s->children[i], next_parent) == NULL; } } @@ -831,7 +883,7 @@ static int quorum_valid_threshold(int threshold, int num_children, Error **errp) if (threshold < 1) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "vote-threshold", "value >= 1"); + "vote-threshold", "a value >= 1"); return -ERANGE; } @@ -871,11 +923,25 @@ static QemuOptsList quorum_runtime_opts = { }, }; +static void quorum_refresh_flags(BlockDriverState *bs) +{ + BDRVQuorumState *s = bs->opaque; + int i; + + bs->supported_zero_flags = + BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; + + for (i = 0; i < s->num_children; i++) { + bs->supported_zero_flags &= s->children[i]->bs->supported_zero_flags; + } + + bs->supported_zero_flags |= BDRV_REQ_WRITE_UNCHANGED; +} + static int quorum_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { BDRVQuorumState *s = bs->opaque; - Error *local_err = NULL; QemuOpts *opts = NULL; const char *pattern_str; bool *opened; @@ -887,27 +953,25 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, /* count how many different children are present */ s->num_children = qdict_array_entries(options, "children."); if (s->num_children < 0) { - error_setg(&local_err, "Option children is not a valid array"); + error_setg(errp, "Option children is not a valid array"); ret = -EINVAL; goto exit; } if (s->num_children < 1) { - error_setg(&local_err, - "Number of provided children must be 1 or more"); + error_setg(errp, "Number of provided children must be 1 or more"); ret = -EINVAL; goto exit; } opts = qemu_opts_create(&quorum_runtime_opts, NULL, 0, &error_abort); - qemu_opts_absorb_qdict(opts, options, &local_err); - if (local_err) { + if (!qemu_opts_absorb_qdict(opts, options, errp)) { ret = -EINVAL; goto exit; } s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0); /* and validate it against s->num_children */ - ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err); + ret = quorum_valid_threshold(s->threshold, s->num_children, errp); if (ret < 0) { goto exit; } @@ -920,25 +984,24 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, -EINVAL, NULL); } if (ret < 0) { - error_setg(&local_err, "Please set read-pattern as fifo or quorum"); + error_setg(errp, "Please set read-pattern as fifo or quorum"); goto exit; } s->read_pattern = ret; if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) { - /* is the driver in blkverify mode */ - if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) && - s->num_children == 2 && s->threshold == 2) { - s->is_blkverify = true; - } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) { - fprintf(stderr, "blkverify mode is set by setting blkverify=on " - "and using two files with vote_threshold=2\n"); + s->is_blkverify = qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false); + if (s->is_blkverify && (s->num_children != 2 || s->threshold != 2)) { + error_setg(errp, "blkverify=on can only be set if there are " + "exactly two files and vote-threshold is 2"); + ret = -EINVAL; + goto exit; } s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false); if (s->rewrite_corrupted && s->is_blkverify) { - error_setg(&local_err, + error_setg(errp, "rewrite-corrupted=on cannot be used with blkverify=on"); ret = -EINVAL; goto exit; @@ -950,13 +1013,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, opened = g_new0(bool, s->num_children); for (i = 0; i < s->num_children; i++) { - char indexstr[32]; - ret = snprintf(indexstr, 32, "children.%d", i); - assert(ret < 32); + char indexstr[INDEXSTR_LEN]; + ret = snprintf(indexstr, INDEXSTR_LEN, "children.%d", i); + assert(ret < INDEXSTR_LEN); s->children[i] = bdrv_open_child(NULL, options, indexstr, bs, - &child_format, false, &local_err); - if (local_err) { + &child_of_bds, BDRV_CHILD_DATA, false, + errp); + if (!s->children[i]) { ret = -EINVAL; goto close_exit; } @@ -966,24 +1030,25 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, s->next_child_index = s->num_children; bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; + quorum_refresh_flags(bs); g_free(opened); goto exit; close_exit: /* cleanup on error */ + bdrv_graph_wrlock(); for (i = 0; i < s->num_children; i++) { if (!opened[i]) { continue; } bdrv_unref_child(bs, s->children[i]); } + bdrv_graph_wrunlock(); g_free(s->children); g_free(opened); exit: qemu_opts_del(opts); - /* propagate error */ - error_propagate(errp, local_err); return ret; } @@ -992,21 +1057,28 @@ static void quorum_close(BlockDriverState *bs) BDRVQuorumState *s = bs->opaque; int i; + bdrv_graph_wrlock(); for (i = 0; i < s->num_children; i++) { bdrv_unref_child(bs, s->children[i]); } + bdrv_graph_wrunlock(); g_free(s->children); } -static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, - Error **errp) +static void GRAPH_WRLOCK +quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, Error **errp) { BDRVQuorumState *s = bs->opaque; BdrvChild *child; - char indexstr[32]; + char indexstr[INDEXSTR_LEN]; int ret; + if (s->is_blkverify) { + error_setg(errp, "Cannot add a child to a quorum in blkverify mode"); + return; + } + assert(s->num_children <= INT_MAX / sizeof(BdrvChild *)); if (s->num_children == INT_MAX / sizeof(BdrvChild *) || s->next_child_index == UINT_MAX) { @@ -1014,35 +1086,32 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, return; } - ret = snprintf(indexstr, 32, "children.%u", s->next_child_index); - if (ret < 0 || ret >= 32) { + ret = snprintf(indexstr, INDEXSTR_LEN, "children.%u", s->next_child_index); + if (ret < 0 || ret >= INDEXSTR_LEN) { error_setg(errp, "cannot generate child name"); return; } s->next_child_index++; - bdrv_drained_begin(bs); - /* We can safely add the child now */ bdrv_ref(child_bs); - child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp); + child = bdrv_attach_child(bs, child_bs, indexstr, &child_of_bds, + BDRV_CHILD_DATA, errp); if (child == NULL) { s->next_child_index--; - bdrv_unref(child_bs); - goto out; + return; } s->children = g_renew(BdrvChild *, s->children, s->num_children + 1); s->children[s->num_children++] = child; - -out: - bdrv_drained_end(bs); + quorum_refresh_flags(bs); } -static void quorum_del_child(BlockDriverState *bs, BdrvChild *child, - Error **errp) +static void GRAPH_WRLOCK +quorum_del_child(BlockDriverState *bs, BdrvChild *child, Error **errp) { BDRVQuorumState *s = bs->opaque; + char indexstr[INDEXSTR_LEN]; int i; for (i = 0; i < s->num_children; i++) { @@ -1061,47 +1130,153 @@ static void quorum_del_child(BlockDriverState *bs, BdrvChild *child, return; } - bdrv_drained_begin(bs); + /* We know now that num_children > threshold, so blkverify must be false */ + assert(!s->is_blkverify); + + snprintf(indexstr, INDEXSTR_LEN, "children.%u", s->next_child_index - 1); + if (!strncmp(child->name, indexstr, INDEXSTR_LEN)) { + s->next_child_index--; + } /* We can safely remove this child now */ memmove(&s->children[i], &s->children[i + 1], (s->num_children - i - 1) * sizeof(BdrvChild *)); s->children = g_renew(BdrvChild *, s->children, --s->num_children); + bdrv_unref_child(bs, child); - bdrv_drained_end(bs); + quorum_refresh_flags(bs); } -static void quorum_refresh_filename(BlockDriverState *bs, QDict *options) +static void quorum_gather_child_options(BlockDriverState *bs, QDict *target, + bool backing_overridden) { BDRVQuorumState *s = bs->opaque; - QDict *opts; - QList *children; + QList *children_list; int i; - for (i = 0; i < s->num_children; i++) { - bdrv_refresh_filename(s->children[i]->bs); - if (!s->children[i]->bs->full_open_options) { - return; - } - } + /* + * The generic implementation for gathering child options in + * bdrv_refresh_filename() would use the names of the children + * as specified for bdrv_open_child() or bdrv_attach_child(), + * which is "children.%u" with %u being a value + * (s->next_child_index) that is incremented each time a new child + * is added (and never decremented). Since children can be + * deleted at runtime, there may be gaps in that enumeration. + * When creating a new quorum BDS and specifying the children for + * it through runtime options, the enumeration used there may not + * have any gaps, though. + * + * Therefore, we have to create a new gap-less enumeration here + * (which we can achieve by simply putting all of the children's + * full_open_options into a QList). + * + * XXX: Note that there are issues with the current child option + * structure quorum uses (such as the fact that children do + * not really have unique permanent names). Therefore, this + * is going to have to change in the future and ideally we + * want quorum to be covered by the generic implementation. + */ + + children_list = qlist_new(); + qdict_put(target, "children", children_list); - children = qlist_new(); for (i = 0; i < s->num_children; i++) { - qlist_append(children, + qlist_append(children_list, qobject_ref(s->children[i]->bs->full_open_options)); } +} + +static char *quorum_dirname(BlockDriverState *bs, Error **errp) +{ + /* In general, there are multiple BDSs with different dirnames below this + * one; so there is no unique dirname we could return (unless all are equal + * by chance, or there is only one). Therefore, to be consistent, just + * always return NULL. */ + error_setg(errp, "Cannot generate a base directory for quorum nodes"); + return NULL; +} + +static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + BDRVQuorumState *s = bs->opaque; + + *nperm = perm & DEFAULT_PERM_PASSTHROUGH; + if (s->rewrite_corrupted) { + *nperm |= BLK_PERM_WRITE; + } + + /* + * We cannot share RESIZE or WRITE, as this would make the + * children differ from each other. + */ + *nshared = (shared & (BLK_PERM_CONSISTENT_READ | + BLK_PERM_WRITE_UNCHANGED)) + | DEFAULT_PERM_UNCHANGED; +} - opts = qdict_new(); - qdict_put_str(opts, "driver", "quorum"); - qdict_put_int(opts, QUORUM_OPT_VOTE_THRESHOLD, s->threshold); - qdict_put_bool(opts, QUORUM_OPT_BLKVERIFY, s->is_blkverify); - qdict_put_bool(opts, QUORUM_OPT_REWRITE, s->rewrite_corrupted); - qdict_put(opts, "children", children); +/* + * Each one of the children can report different status flags even + * when they contain the same data, so what this function does is + * return BDRV_BLOCK_ZERO if *all* children agree that a certain + * region contains zeroes, and BDRV_BLOCK_DATA otherwise. + */ +static int coroutine_fn GRAPH_RDLOCK +quorum_co_block_status(BlockDriverState *bs, bool want_zero, + int64_t offset, int64_t count, + int64_t *pnum, int64_t *map, BlockDriverState **file) +{ + BDRVQuorumState *s = bs->opaque; + int i, ret; + int64_t pnum_zero = count; + int64_t pnum_data = 0; - bs->full_open_options = opts; + for (i = 0; i < s->num_children; i++) { + int64_t bytes; + ret = bdrv_co_common_block_status_above(s->children[i]->bs, NULL, false, + want_zero, offset, count, + &bytes, NULL, NULL, NULL); + if (ret < 0) { + quorum_report_bad(QUORUM_OP_TYPE_READ, offset, count, + s->children[i]->bs->node_name, ret); + pnum_data = count; + break; + } + /* + * Even if all children agree about whether there are zeroes + * or not at @offset they might disagree on the size, so use + * the smallest when reporting BDRV_BLOCK_ZERO and the largest + * when reporting BDRV_BLOCK_DATA. + */ + if (ret & BDRV_BLOCK_ZERO) { + pnum_zero = MIN(pnum_zero, bytes); + } else { + pnum_data = MAX(pnum_data, bytes); + } + } + + if (pnum_data) { + *pnum = pnum_data; + return BDRV_BLOCK_DATA; + } else { + *pnum = pnum_zero; + return BDRV_BLOCK_ZERO; + } } +static const char *const quorum_strong_runtime_opts[] = { + QUORUM_OPT_VOTE_THRESHOLD, + QUORUM_OPT_BLKVERIFY, + QUORUM_OPT_REWRITE, + QUORUM_OPT_READ_PATTERN, + + NULL +}; + static BlockDriver bdrv_quorum = { .format_name = "quorum", @@ -1109,22 +1284,26 @@ static BlockDriver bdrv_quorum = { .bdrv_open = quorum_open, .bdrv_close = quorum_close, - .bdrv_refresh_filename = quorum_refresh_filename, + .bdrv_gather_child_options = quorum_gather_child_options, + .bdrv_dirname = quorum_dirname, + .bdrv_co_block_status = quorum_co_block_status, - .bdrv_co_flush_to_disk = quorum_co_flush, + .bdrv_co_flush = quorum_co_flush, - .bdrv_getlength = quorum_getlength, + .bdrv_co_getlength = quorum_co_getlength, .bdrv_co_preadv = quorum_co_preadv, .bdrv_co_pwritev = quorum_co_pwritev, + .bdrv_co_pwrite_zeroes = quorum_co_pwrite_zeroes, .bdrv_add_child = quorum_add_child, .bdrv_del_child = quorum_del_child, - .bdrv_child_perm = bdrv_filter_default_perms, + .bdrv_child_perm = quorum_child_perm, + + .bdrv_recurse_can_replace = quorum_recurse_can_replace, - .is_filter = true, - .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter, + .strong_runtime_opts = quorum_strong_runtime_opts, }; static void bdrv_quorum_init(void) |