aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2016-02-22 16:55:41 +0000
committerPeter Maydell <peter.maydell@linaro.org>2016-02-22 16:55:41 +0000
commit8eb779e4223a18db9838a49ece1bc72cfdfb7761 (patch)
tree8b3bac97d620379d579e02cff3bb845f79ccdda3
parenta02dabe10adc4ae3be54f7413f60dcaa67028389 (diff)
parentfe243e4881bc9e09767dba05f15acb016cfa7a52 (diff)
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches # gpg: Signature made Mon 22 Feb 2016 15:59:25 GMT using RSA key ID C88F2FD6 # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" * remotes/kevin/tags/for-upstream: (34 commits) qemu-iotests: 140: make description slightly more verbose qemu-iotests: 140: don't use IDE device qemu-iotests: 067: ignore QMP events blockdev: unset inappropriate flags when changing medium MAINTAINERS: Add myself as maintainer of the throttling code docs: Document the throttling infrastructure qapi: Correct the name of the iops_rd parameter qemu-iotests: Extend iotest 093 to test bursts throttle: Test throttle_compute_wait() during bursts throttle: Check that burst_level leaks correctly qapi: Add burst length fields to BlockDeviceInfo qapi: Add burst length parameters to block_set_io_throttle throttle: Add command-line settings to define the burst periods throttle: Add support for burst periods throttle: Use throttle_config_init() to initialize ThrottleConfig throttle: Merge all functions that check the configuration into one throttle: Set always an average value when setting a maximum value throttle: Make throttle_is_valid() set errp throttle: Make throttle_max_is_missing_limit() set errp throttle: Make throttle_conflicting() set errp ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--MAINTAINERS9
-rw-r--r--block.c4
-rw-r--r--block/qapi.c20
-rw-r--r--block/quorum.c3
-rw-r--r--blockdev.c109
-rw-r--r--docs/specs/qcow2.txt221
-rw-r--r--docs/throttle.txt252
-rw-r--r--hmp.c12
-rw-r--r--include/qemu/throttle.h55
-rw-r--r--migration/block.c7
-rw-r--r--qapi/block-core.json92
-rw-r--r--qemu-img-cmds.hx44
-rw-r--r--qemu-img.c491
-rw-r--r--qemu-img.texi14
-rw-r--r--qemu-io.c115
-rw-r--r--qemu-nbd.c104
-rw-r--r--qemu-nbd.texi7
-rw-r--r--qmp-commands.hx25
-rwxr-xr-xtests/qemu-iotests/06711
-rw-r--r--tests/qemu-iotests/067.out144
-rwxr-xr-xtests/qemu-iotests/09365
-rw-r--r--tests/qemu-iotests/093.out4
-rwxr-xr-xtests/qemu-iotests/1408
-rw-r--r--tests/qemu-iotests/140.out1
-rwxr-xr-xtests/qemu-iotests/14552
-rw-r--r--tests/qemu-iotests/145.out5
-rw-r--r--tests/qemu-iotests/group1
-rw-r--r--tests/test-throttle.c88
-rw-r--r--util/throttle.c132
29 files changed, 1663 insertions, 432 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 9adeda355e..606d9c08b5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1283,6 +1283,15 @@ S: Maintained
F: include/qemu/sockets.h
F: util/qemu-sockets.c
+Throttling infrastructure
+M: Alberto Garcia <berto@igalia.com>
+S: Supported
+F: block/throttle-groups.c
+F: include/block/throttle-groups.h
+F: include/qemu/throttle.h
+F: util/throttle.c
+L: qemu-block@nongnu.org
+
Usermode Emulation
------------------
Overall
diff --git a/block.c b/block.c
index efc3c43f89..ba24b8e674 100644
--- a/block.c
+++ b/block.c
@@ -1191,10 +1191,6 @@ static int bdrv_fill_options(QDict **options, const char *filename,
}
}
- if (runstate_check(RUN_STATE_INMIGRATE)) {
- *flags |= BDRV_O_INACTIVE;
- }
-
return 0;
}
diff --git a/block/qapi.c b/block/qapi.c
index 67891b7d19..db2d3fb915 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -92,6 +92,26 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
info->iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
+ info->has_bps_max_length = info->has_bps_max;
+ info->bps_max_length =
+ cfg.buckets[THROTTLE_BPS_TOTAL].burst_length;
+ info->has_bps_rd_max_length = info->has_bps_rd_max;
+ info->bps_rd_max_length =
+ cfg.buckets[THROTTLE_BPS_READ].burst_length;
+ info->has_bps_wr_max_length = info->has_bps_wr_max;
+ info->bps_wr_max_length =
+ cfg.buckets[THROTTLE_BPS_WRITE].burst_length;
+
+ info->has_iops_max_length = info->has_iops_max;
+ info->iops_max_length =
+ cfg.buckets[THROTTLE_OPS_TOTAL].burst_length;
+ info->has_iops_rd_max_length = info->has_iops_rd_max;
+ info->iops_rd_max_length =
+ cfg.buckets[THROTTLE_OPS_READ].burst_length;
+ info->has_iops_wr_max_length = info->has_iops_wr_max;
+ info->iops_wr_max_length =
+ cfg.buckets[THROTTLE_OPS_WRITE].burst_length;
+
info->has_iops_size = cfg.op_size;
info->iops_size = cfg.op_size;
diff --git a/block/quorum.c b/block/quorum.c
index a5ae4b812b..11cc60b713 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -286,7 +286,8 @@ static void quorum_aio_cb(void *opaque, int ret)
if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
/* We try to read next child in FIFO order if we fail to read */
- if (ret < 0 && ++acb->child_iter < s->num_children) {
+ if (ret < 0 && (acb->child_iter + 1) < s->num_children) {
+ acb->child_iter++;
read_fifo_child(acb);
return;
}
diff --git a/blockdev.c b/blockdev.c
index 1f7347821c..d4bc435940 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -343,29 +343,6 @@ static bool parse_stats_intervals(BlockAcctStats *stats, QList *intervals,
return true;
}
-static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
-{
- if (throttle_conflicting(cfg)) {
- error_setg(errp, "bps/iops/max total values and read/write values"
- " cannot be used at the same time");
- return false;
- }
-
- if (!throttle_is_valid(cfg)) {
- error_setg(errp, "bps/iops/max values must be within [0, %lld]",
- THROTTLE_VALUE_MAX);
- return false;
- }
-
- if (throttle_max_is_missing_limit(cfg)) {
- error_setg(errp, "bps_max/iops_max require corresponding"
- " bps/iops values");
- return false;
- }
-
- return true;
-}
-
typedef enum { MEDIA_DISK, MEDIA_CDROM } DriveMediaType;
/* All parameters but @opts are optional and may be set to NULL. */
@@ -410,7 +387,7 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
}
if (throttle_cfg) {
- memset(throttle_cfg, 0, sizeof(*throttle_cfg));
+ throttle_config_init(throttle_cfg);
throttle_cfg->buckets[THROTTLE_BPS_TOTAL].avg =
qemu_opt_get_number(opts, "throttling.bps-total", 0);
throttle_cfg->buckets[THROTTLE_BPS_READ].avg =
@@ -437,10 +414,23 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
throttle_cfg->buckets[THROTTLE_OPS_WRITE].max =
qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
+ throttle_cfg->buckets[THROTTLE_BPS_TOTAL].burst_length =
+ qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1);
+ throttle_cfg->buckets[THROTTLE_BPS_READ].burst_length =
+ qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1);
+ throttle_cfg->buckets[THROTTLE_BPS_WRITE].burst_length =
+ qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1);
+ throttle_cfg->buckets[THROTTLE_OPS_TOTAL].burst_length =
+ qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1);
+ throttle_cfg->buckets[THROTTLE_OPS_READ].burst_length =
+ qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1);
+ throttle_cfg->buckets[THROTTLE_OPS_WRITE].burst_length =
+ qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1);
+
throttle_cfg->op_size =
qemu_opt_get_number(opts, "throttling.iops-size", 0);
- if (!check_throttle_config(throttle_cfg, errp)) {
+ if (!throttle_is_valid(throttle_cfg, errp)) {
return;
}
}
@@ -610,6 +600,10 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
qdict_put(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, qstring_from_str("on"));
}
+ if (runstate_check(RUN_STATE_INMIGRATE)) {
+ bdrv_flags |= BDRV_O_INACTIVE;
+ }
+
blk = blk_new_open(qemu_opts_id(opts), file, NULL, bs_opts, bdrv_flags,
errp);
if (!blk) {
@@ -688,6 +682,10 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
goto fail;
}
+ if (runstate_check(RUN_STATE_INMIGRATE)) {
+ bdrv_flags |= BDRV_O_INACTIVE;
+ }
+
bs = NULL;
ret = bdrv_open(&bs, NULL, NULL, bs_opts, bdrv_flags, errp);
if (ret < 0) {
@@ -2515,6 +2513,8 @@ void qmp_blockdev_change_medium(const char *device, const char *filename,
}
bdrv_flags = blk_get_open_flags_from_root_state(blk);
+ bdrv_flags &= ~(BDRV_O_TEMPORARY | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING |
+ BDRV_O_PROTOCOL);
if (!has_read_only) {
read_only = BLOCKDEV_CHANGE_READ_ONLY_MODE_RETAIN;
@@ -2600,6 +2600,18 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
int64_t iops_rd_max,
bool has_iops_wr_max,
int64_t iops_wr_max,
+ bool has_bps_max_length,
+ int64_t bps_max_length,
+ bool has_bps_rd_max_length,
+ int64_t bps_rd_max_length,
+ bool has_bps_wr_max_length,
+ int64_t bps_wr_max_length,
+ bool has_iops_max_length,
+ int64_t iops_max_length,
+ bool has_iops_rd_max_length,
+ int64_t iops_rd_max_length,
+ bool has_iops_wr_max_length,
+ int64_t iops_wr_max_length,
bool has_iops_size,
int64_t iops_size,
bool has_group,
@@ -2626,7 +2638,7 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
goto out;
}
- memset(&cfg, 0, sizeof(cfg));
+ throttle_config_init(&cfg);
cfg.buckets[THROTTLE_BPS_TOTAL].avg = bps;
cfg.buckets[THROTTLE_BPS_READ].avg = bps_rd;
cfg.buckets[THROTTLE_BPS_WRITE].avg = bps_wr;
@@ -2654,11 +2666,30 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
cfg.buckets[THROTTLE_OPS_WRITE].max = iops_wr_max;
}
+ if (has_bps_max_length) {
+ cfg.buckets[THROTTLE_BPS_TOTAL].burst_length = bps_max_length;
+ }
+ if (has_bps_rd_max_length) {
+ cfg.buckets[THROTTLE_BPS_READ].burst_length = bps_rd_max_length;
+ }
+ if (has_bps_wr_max_length) {
+ cfg.buckets[THROTTLE_BPS_WRITE].burst_length = bps_wr_max_length;
+ }
+ if (has_iops_max_length) {
+ cfg.buckets[THROTTLE_OPS_TOTAL].burst_length = iops_max_length;
+ }
+ if (has_iops_rd_max_length) {
+ cfg.buckets[THROTTLE_OPS_READ].burst_length = iops_rd_max_length;
+ }
+ if (has_iops_wr_max_length) {
+ cfg.buckets[THROTTLE_OPS_WRITE].burst_length = iops_wr_max_length;
+ }
+
if (has_iops_size) {
cfg.op_size = iops_size;
}
- if (!check_throttle_config(&cfg, errp)) {
+ if (!throttle_is_valid(&cfg, errp)) {
goto out;
}
@@ -4087,6 +4118,30 @@ QemuOptsList qemu_common_drive_opts = {
.type = QEMU_OPT_NUMBER,
.help = "total bytes write burst",
},{
+ .name = "throttling.iops-total-max-length",
+ .type = QEMU_OPT_NUMBER,
+ .help = "length of the iops-total-max burst period, in seconds",
+ },{
+ .name = "throttling.iops-read-max-length",
+ .type = QEMU_OPT_NUMBER,
+ .help = "length of the iops-read-max burst period, in seconds",
+ },{
+ .name = "throttling.iops-write-max-length",
+ .type = QEMU_OPT_NUMBER,
+ .help = "length of the iops-write-max burst period, in seconds",
+ },{
+ .name = "throttling.bps-total-max-length",
+ .type = QEMU_OPT_NUMBER,
+ .help = "length of the bps-total-max burst period, in seconds",
+ },{
+ .name = "throttling.bps-read-max-length",
+ .type = QEMU_OPT_NUMBER,
+ .help = "length of the bps-read-max burst period, in seconds",
+ },{
+ .name = "throttling.bps-write-max-length",
+ .type = QEMU_OPT_NUMBER,
+ .help = "length of the bps-write-max burst period, in seconds",
+ },{
.name = "throttling.iops-size",
.type = QEMU_OPT_NUMBER,
.help = "when limiting by iops max size of an I/O in bytes",
diff --git a/docs/specs/qcow2.txt b/docs/specs/qcow2.txt
index f236d8c6d9..80cdfd0e91 100644
--- a/docs/specs/qcow2.txt
+++ b/docs/specs/qcow2.txt
@@ -103,7 +103,18 @@ in the description of a field.
write to an image with unknown auto-clear features if it
clears the respective bits from this field first.
- Bits 0-63: Reserved (set to 0)
+ Bit 0: Bitmaps extension bit
+ This bit indicates consistency for the bitmaps
+ extension data.
+
+ It is an error if this bit is set without the
+ bitmaps extension present.
+
+ If the bitmaps extension is present but this
+ bit is unset, the bitmaps extension data must be
+ considered inconsistent.
+
+ Bits 1-63: Reserved (set to 0)
96 - 99: refcount_order
Describes the width of a reference count block entry (width
@@ -123,6 +134,7 @@ be stored. Each extension has a structure like the following:
0x00000000 - End of the header extension area
0xE2792ACA - Backing file format name
0x6803f857 - Feature name table
+ 0x23852875 - Bitmaps extension
other - Unknown header extension, can be safely
ignored
@@ -166,6 +178,36 @@ the header extension data. Each entry look like this:
terminated if it has full length)
+== Bitmaps extension ==
+
+The bitmaps extension is an optional header extension. It provides the ability
+to store bitmaps related to a virtual disk. For now, there is only one bitmap
+type: the dirty tracking bitmap, which tracks virtual disk changes from some
+point in time.
+
+The data of the extension should be considered consistent only if the
+corresponding auto-clear feature bit is set, see autoclear_features above.
+
+The fields of the bitmaps extension are:
+
+ Byte 0 - 3: nb_bitmaps
+ The number of bitmaps contained in the image. Must be
+ greater than or equal to 1.
+
+ Note: Qemu currently only supports up to 65535 bitmaps per
+ image.
+
+ 4 - 7: Reserved, must be zero.
+
+ 8 - 15: bitmap_directory_size
+ Size of the bitmap directory in bytes. It is the cumulative
+ size of all (nb_bitmaps) bitmap headers.
+
+ 16 - 23: bitmap_directory_offset
+ Offset into the image file at which the bitmap directory
+ starts. Must be aligned to a cluster boundary.
+
+
== Host cluster management ==
qcow2 manages the allocation of host clusters by maintaining a reference count
@@ -360,3 +402,180 @@ Snapshot table entry:
variable: Padding to round up the snapshot table entry size to the
next multiple of 8.
+
+
+== Bitmaps ==
+
+As mentioned above, the bitmaps extension provides the ability to store bitmaps
+related to a virtual disk. This section describes how these bitmaps are stored.
+
+All stored bitmaps are related to the virtual disk stored in the same image, so
+each bitmap size is equal to the virtual disk size.
+
+Each bit of the bitmap is responsible for strictly defined range of the virtual
+disk. For bit number bit_nr the corresponding range (in bytes) will be:
+
+ [bit_nr * bitmap_granularity .. (bit_nr + 1) * bitmap_granularity - 1]
+
+Granularity is a property of the concrete bitmap, see below.
+
+
+=== Bitmap directory ===
+
+Each bitmap saved in the image is described in a bitmap directory entry. The
+bitmap directory is a contiguous area in the image file, whose starting offset
+and length are given by the header extension fields bitmap_directory_offset and
+bitmap_directory_size. The entries of the bitmap directory have variable
+length, depending on the lengths of the bitmap name and extra data. These
+entries are also called bitmap headers.
+
+Structure of a bitmap directory entry:
+
+ Byte 0 - 7: bitmap_table_offset
+ Offset into the image file at which the bitmap table
+ (described below) for the bitmap starts. Must be aligned to
+ a cluster boundary.
+
+ 8 - 11: bitmap_table_size
+ Number of entries in the bitmap table of the bitmap.
+
+ 12 - 15: flags
+ Bit
+ 0: in_use
+ The bitmap was not saved correctly and may be
+ inconsistent.
+
+ 1: auto
+ The bitmap must reflect all changes of the virtual
+ disk by any application that would write to this qcow2
+ file (including writes, snapshot switching, etc.). The
+ type of this bitmap must be 'dirty tracking bitmap'.
+
+ 2: extra_data_compatible
+ This flags is meaningful when the extra data is
+ unknown to the software (currently any extra data is
+ unknown to Qemu).
+ If it is set, the bitmap may be used as expected, extra
+ data must be left as is.
+ If it is not set, the bitmap must not be used, but
+ both it and its extra data be left as is.
+
+ Bits 3 - 31 are reserved and must be 0.
+
+ 16: type
+ This field describes the sort of the bitmap.
+ Values:
+ 1: Dirty tracking bitmap
+
+ Values 0, 2 - 255 are reserved.
+
+ 17: granularity_bits
+ Granularity bits. Valid values: 0 - 63.
+
+ Note: Qemu currently doesn't support granularity_bits
+ greater than 31.
+
+ Granularity is calculated as
+ granularity = 1 << granularity_bits
+
+ A bitmap's granularity is how many bytes of the image
+ accounts for one bit of the bitmap.
+
+ 18 - 19: name_size
+ Size of the bitmap name. Must be non-zero.
+
+ Note: Qemu currently doesn't support values greater than
+ 1023.
+
+ 20 - 23: extra_data_size
+ Size of type-specific extra data.
+
+ For now, as no extra data is defined, extra_data_size is
+ reserved and should be zero. If it is non-zero the
+ behavior is defined by extra_data_compatible flag.
+
+ variable: extra_data
+ Extra data for the bitmap, occupying extra_data_size bytes.
+ Extra data must never contain references to clusters or in
+ some other way allocate additional clusters.
+
+ variable: name
+ The name of the bitmap (not null terminated), occupying
+ name_size bytes. Must be unique among all bitmap names
+ within the bitmaps extension.
+
+ variable: Padding to round up the bitmap directory entry size to the
+ next multiple of 8. All bytes of the padding must be zero.
+
+
+=== Bitmap table ===
+
+Each bitmap is stored using a one-level structure (as opposed to two-level
+structures like for refcounts and guest clusters mapping) for the mapping of
+bitmap data to host clusters. This structure is called the bitmap table.
+
+Each bitmap table has a variable size (stored in the bitmap directory entry)
+and may use multiple clusters, however, it must be contiguous in the image
+file.
+
+Structure of a bitmap table entry:
+
+ Bit 0: Reserved and must be zero if bits 9 - 55 are non-zero.
+ If bits 9 - 55 are zero:
+ 0: Cluster should be read as all zeros.
+ 1: Cluster should be read as all ones.
+
+ 1 - 8: Reserved and must be zero.
+
+ 9 - 55: Bits 9 - 55 of the host cluster offset. Must be aligned to
+ a cluster boundary. If the offset is 0, the cluster is
+ unallocated; in that case, bit 0 determines how this
+ cluster should be treated during reads.
+
+ 56 - 63: Reserved and must be zero.
+
+
+=== Bitmap data ===
+
+As noted above, bitmap data is stored in separate clusters, described by the
+bitmap table. Given an offset (in bytes) into the bitmap data, the offset into
+the image file can be obtained as follows:
+
+ image_offset(bitmap_data_offset) =
+ bitmap_table[bitmap_data_offset / cluster_size] +
+ (bitmap_data_offset % cluster_size)
+
+This offset is not defined if bits 9 - 55 of bitmap table entry are zero (see
+above).
+
+Given an offset byte_nr into the virtual disk and the bitmap's granularity, the
+bit offset into the image file to the corresponding bit of the bitmap can be
+calculated like this:
+
+ bit_offset(byte_nr) =
+ image_offset(byte_nr / granularity / 8) * 8 +
+ (byte_nr / granularity) % 8
+
+If the size of the bitmap data is not a multiple of the cluster size then the
+last cluster of the bitmap data contains some unused tail bits. These bits must
+be zero.
+
+
+=== Dirty tracking bitmaps ===
+
+Bitmaps with 'type' field equal to one are dirty tracking bitmaps.
+
+When the virtual disk is in use dirty tracking bitmap may be 'enabled' or
+'disabled'. While the bitmap is 'enabled', all writes to the virtual disk
+should be reflected in the bitmap. A set bit in the bitmap means that the
+corresponding range of the virtual disk (see above) was written to while the
+bitmap was 'enabled'. An unset bit means that this range was not written to.
+
+The software doesn't have to sync the bitmap in the image file with its
+representation in RAM after each write. Flag 'in_use' should be set while the
+bitmap is not synced.
+
+In the image file the 'enabled' state is reflected by the 'auto' flag. If this
+flag is set, the software must consider the bitmap as 'enabled' and start
+tracking virtual disk changes to this bitmap from the first write to the
+virtual disk. If this flag is not set then the bitmap is disabled.
diff --git a/docs/throttle.txt b/docs/throttle.txt
new file mode 100644
index 0000000000..28204e46ca
--- /dev/null
+++ b/docs/throttle.txt
@@ -0,0 +1,252 @@
+The QEMU throttling infrastructure
+==================================
+Copyright (C) 2016 Igalia, S.L.
+Author: Alberto Garcia <berto@igalia.com>
+
+This work is licensed under the terms of the GNU GPL, version 2 or
+later. See the COPYING file in the top-level directory.
+
+Introduction
+------------
+QEMU includes a throttling module that can be used to set limits to
+I/O operations. The code itself is generic and independent of the I/O
+units, but it is currenly used to limit the number of bytes per second
+and operations per second (IOPS) when performing disk I/O.
+
+This document explains how to use the throttling code in QEMU, and how
+it works internally. The implementation is in throttle.c.
+
+
+Using throttling to limit disk I/O
+----------------------------------
+Two aspects of the disk I/O can be limited: the number of bytes per
+second and the number of operations per second (IOPS). For each one of
+them the user can set a global limit or separate limits for read and
+write operations. This gives us a total of six different parameters.
+
+I/O limits can be set using the throttling.* parameters of -drive, or
+using the QMP 'block_set_io_throttle' command. These are the names of
+the parameters for both cases:
+
+|-----------------------+-----------------------|
+| -drive | block_set_io_throttle |
+|-----------------------+-----------------------|
+| throttling.iops-total | iops |
+| throttling.iops-read | iops_rd |
+| throttling.iops-write | iops_wr |
+| throttling.bps-total | bps |
+| throttling.bps-read | bps_rd |
+| throttling.bps-write | bps_wr |
+|-----------------------+-----------------------|
+
+It is possible to set limits for both IOPS and bps and the same time,
+and for each case we can decide whether to have separate read and
+write limits or not, but note that if iops-total is set then neither
+iops-read nor iops-write can be set. The same applies to bps-total and
+bps-read/write.
+
+The default value of these parameters is 0, and it means 'unlimited'.
+
+In its most basic usage, the user can add a drive to QEMU with a limit
+of 100 IOPS with the following -drive line:
+
+ -drive file=hd0.qcow2,throttling.iops-total=100
+
+We can do the same using QMP. In this case all these parameters are
+mandatory, so we must set to 0 the ones that we don't want to limit:
+
+ { "execute": "block_set_io_throttle",
+ "arguments": {
+ "device": "virtio0",
+ "iops": 100,
+ "iops_rd": 0,
+ "iops_wr": 0,
+ "bps": 0,
+ "bps_rd": 0,
+ "bps_wr": 0
+ }
+ }
+
+
+I/O bursts
+----------
+In addition to the basic limits we have just seen, QEMU allows the
+user to do bursts of I/O for a configurable amount of time. A burst is
+an amount of I/O that can exceed the basic limit. Bursts are useful to
+allow better performance when there are peaks of activity (the OS
+boots, a service needs to be restarted) while keeping the average
+limits lower the rest of the time.
+
+Two parameters control bursts: their length and the maximum amount of
+I/O they allow. These two can be configured separately for each one of
+the six basic parameters described in the previous section, but in
+this section we'll use 'iops-total' as an example.
+
+The I/O limit during bursts is set using 'iops-total-max', and the
+maximum length (in seconds) is set with 'iops-total-max-length'. So if
+we want to configure a drive with a basic limit of 100 IOPS and allow
+bursts of 2000 IOPS for 60 seconds, we would do it like this (the line
+is split for clarity):
+
+ -drive file=hd0.qcow2,
+ throttling.iops-total=100,
+ throttling.iops-total-max=2000,
+ throttling.iops-total-max-length=60
+
+Or, with QMP:
+
+ { "execute": "block_set_io_throttle",
+ "arguments": {
+ "device": "virtio0",
+ "iops": 100,
+ "iops_rd": 0,
+ "iops_wr": 0,
+ "bps": 0,
+ "bps_rd": 0,
+ "bps_wr": 0,
+ "iops_max": 2000,
+ "iops_max_length": 60,
+ }
+ }
+
+With this, the user can perform I/O on hd0.qcow2 at a rate of 2000
+IOPS for 1 minute before it's throttled down to 100 IOPS.
+
+The user will be able to do bursts again if there's a sufficiently
+long period of time with unused I/O (see below for details).
+
+The default value for 'iops-total-max' is 0 and it means that bursts
+are not allowed. 'iops-total-max-length' can only be set if
+'iops-total-max' is set as well, and its default value is 1 second.
+
+Here's the complete list of parameters for configuring bursts:
+
+|----------------------------------+-----------------------|
+| -drive | block_set_io_throttle |
+|----------------------------------+-----------------------|
+| throttling.iops-total-max | iops_max |
+| throttling.iops-total-max-length | iops_max_length |
+| throttling.iops-read-max | iops_rd_max |
+| throttling.iops-read-max-length | iops_rd_max_length |
+| throttling.iops-write-max | iops_wr_max |
+| throttling.iops-write-max-length | iops_wr_max_length |
+| throttling.bps-total-max | bps_max |
+| throttling.bps-total-max-length | bps_max_length |
+| throttling.bps-read-max | bps_rd_max |
+| throttling.bps-read-max-length | bps_rd_max_length |
+| throttling.bps-write-max | bps_wr_max |
+| throttling.bps-write-max-length | bps_wr_max_length |
+|----------------------------------+-----------------------|
+
+
+Controlling the size of I/O operations
+--------------------------------------
+When applying IOPS limits all I/O operations are treated equally
+regardless of their size. This means that the user can take advantage
+of this in order to circumvent the limits and submit one huge I/O
+request instead of several smaller ones.
+
+QEMU provides a setting called throttling.iops-size to prevent this
+from happening. This setting specifies the size (in bytes) of an I/O
+request for accounting purposes. Larger requests will be counted
+proportionally to this size.
+
+For example, if iops-size is set to 4096 then an 8KB request will be
+counted as two, and a 6KB request will be counted as one and a
+half. This only applies to requests larger than iops-size: smaller
+requests will be always counted as one, no matter their size.
+
+The default value of iops-size is 0 and it means that the size of the
+requests is never taken into account when applying IOPS limits.
+
+
+Applying I/O limits to groups of disks
+--------------------------------------
+In all the examples so far we have seen how to apply limits to the I/O
+performed on individual drives, but QEMU allows grouping drives so
+they all share the same limits.
+
+The way it works is that each drive with I/O limits is assigned to a
+group named using the throttling.group parameter. If this parameter is
+not specified, then the device name (i.e. 'virtio0', 'ide0-hd0') will
+be used as the group name.
+
+Limits set using the throttling.* parameters discussed earlier in this
+document apply to the combined I/O of all members of a group.
+
+Consider this example:
+
+ -drive file=hd1.qcow2,throttling.iops-total=6000,throttling.group=foo
+ -drive file=hd2.qcow2,throttling.iops-total=6000,throttling.group=foo
+ -drive file=hd3.qcow2,throttling.iops-total=3000,throttling.group=bar
+ -drive file=hd4.qcow2,throttling.iops-total=6000,throttling.group=foo
+ -drive file=hd5.qcow2,throttling.iops-total=3000,throttling.group=bar
+ -drive file=hd6.qcow2,throttling.iops-total=5000
+
+Here hd1, hd2 and hd4 are all members of a group named 'foo' with a
+combined IOPS limit of 6000, and hd3 and hd5 are members of 'bar'. hd6
+is left alone (technically it is part of a 1-member group).
+
+Limits are applied in a round-robin fashion so if there are concurrent
+I/O requests on several drives of the same group they will be
+distributed evenly.
+
+When I/O limits are applied to an existing drive using the QMP command
+'block_set_io_throttle', the following things need to be taken into
+account:
+
+ - I/O limits are shared within the same group, so new values will
+ affect all members and overwrite the previous settings. In other
+ words: if different limits are applied to members of the same
+ group, the last one wins.
+
+ - If 'group' is unset it is assumed to be the current group of that
+ drive. If the drive is not in a group yet, it will be added to a
+ group named after the device name.
+
+ - If 'group' is set then the drive will be moved to that group if
+ it was member of a different one. In this case the limits
+ specified in the parameters will be applied to the new group
+ only.
+
+ - I/O limits can be disabled by setting all of them to 0. In this
+ case the device will be removed from its group and the rest of
+ its members will not be affected. The 'group' parameter is
+ ignored.
+
+
+The Leaky Bucket algorithm
+--------------------------
+I/O limits in QEMU are implemented using the leaky bucket algorithm
+(specifically the "Leaky bucket as a meter" variant).
+
+This algorithm uses the analogy of a bucket that leaks water
+constantly. The water that gets into the bucket represents the I/O
+that has been performed, and no more I/O is allowed once the bucket is
+full.
+
+To see the way this corresponds to the throttling parameters in QEMU,
+consider the following values:
+
+ iops-total=100
+ iops-total-max=2000
+ iops-total-max-length=60
+
+ - Water leaks from the bucket at a rate of 100 IOPS.
+ - Water can be added to the bucket at a rate of 2000 IOPS.
+ - The size of the bucket is 2000 x 60 = 120000
+ - If 'iops-total-max-length' is unset then the bucket size is 100.
+
+The bucket is initially empty, therefore water can be added until it's
+full at a rate of 2000 IOPS (the burst rate). Once the bucket is full
+we can only add as much water as it leaks, therefore the I/O rate is
+reduced to 100 IOPS. If we add less water than it leaks then the
+bucket will start to empty, allowing for bursts again.
+
+Note that since water is leaking from the bucket even during bursts,
+it will take a bit more than 60 seconds at 2000 IOPS to fill it
+up. After those 60 seconds the bucket will have leaked 60 x 100 =
+6000, allowing for 3 more seconds of I/O at 2000 IOPS.
+
+Also, due to the way the algorithm works, longer burst can be done at
+a lower I/O rate, e.g. 1000 IOPS during 120 seconds.
diff --git a/hmp.c b/hmp.c
index bfbd667033..d00c2d4a7c 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1414,6 +1414,18 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
0,
false,
0,
+ false, /* no burst length via HMP */
+ 0,
+ false,
+ 0,
+ false,
+ 0,
+ false,
+ 0,
+ false,
+ 0,
+ false,
+ 0,
false, /* No default I/O size */
0,
false,
diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h
index d0c98ed25b..63df69070a 100644
--- a/include/qemu/throttle.h
+++ b/include/qemu/throttle.h
@@ -2,7 +2,7 @@
* QEMU throttling infrastructure
*
* Copyright (C) Nodalink, EURL. 2013-2014
- * Copyright (C) Igalia, S.L. 2015
+ * Copyright (C) Igalia, S.L. 2015-2016
*
* Authors:
* Benoît Canet <benoit.canet@nodalink.com>
@@ -42,16 +42,47 @@ typedef enum {
} BucketType;
/*
- * The max parameter of the leaky bucket throttling algorithm can be used to
- * allow the guest to do bursts.
- * The max value is a pool of I/O that the guest can use without being throttled
- * at all. Throttling is triggered once this pool is empty.
+ * This module implements I/O limits using the leaky bucket
+ * algorithm. The code is independent of the I/O units, but it is
+ * currently used for bytes per second and operations per second.
+ *
+ * Three parameters can be set by the user:
+ *
+ * - avg: the desired I/O limits in units per second.
+ * - max: the limit during bursts, also in units per second.
+ * - burst_length: the maximum length of the burst period, in seconds.
+ *
+ * Here's how it works:
+ *
+ * - The bucket level (number of performed I/O units) is kept in
+ * bkt.level and leaks at a rate of bkt.avg units per second.
+ *
+ * - The size of the bucket is bkt.max * bkt.burst_length. Once the
+ * bucket is full no more I/O is performed until the bucket leaks
+ * again. This is what makes the I/O rate bkt.avg.
+ *
+ * - The bkt.avg rate does not apply until the bucket is full,
+ * allowing the user to do bursts until then. The I/O limit during
+ * bursts is bkt.max. To enforce this limit we keep an additional
+ * bucket in bkt.burst_length that leaks at a rate of bkt.max units
+ * per second.
+ *
+ * - Because of all of the above, the user can perform I/O at a
+ * maximum of bkt.max units per second for at most bkt.burst_length
+ * seconds in a row. After that the bucket will be full and the I/O
+ * rate will go down to bkt.avg.
+ *
+ * - Since the bucket always leaks at a rate of bkt.avg, this also
+ * determines how much the user needs to wait before being able to
+ * do bursts again.
*/
typedef struct LeakyBucket {
double avg; /* average goal in units per second */
double max; /* leaky bucket max burst in units */
double level; /* bucket level in units */
+ double burst_level; /* bucket level in units (for computing bursts) */
+ unsigned burst_length; /* max length of the burst period, in seconds */
} LeakyBucket;
/* The following structure is used to configure a ThrottleState
@@ -84,12 +115,6 @@ void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta);
int64_t throttle_compute_wait(LeakyBucket *bkt);
-/* expose timer computation function for unit tests */
-bool throttle_compute_timer(ThrottleState *ts,
- bool is_write,
- int64_t now,
- int64_t *next_timestamp);
-
/* init/destroy cycle */
void throttle_init(ThrottleState *ts);
@@ -112,11 +137,7 @@ bool throttle_timers_are_initialized(ThrottleTimers *tt);
/* configuration */
bool throttle_enabled(ThrottleConfig *cfg);
-bool throttle_conflicting(ThrottleConfig *cfg);
-
-bool throttle_is_valid(ThrottleConfig *cfg);
-
-bool throttle_max_is_missing_limit(ThrottleConfig *cfg);
+bool throttle_is_valid(ThrottleConfig *cfg, Error **errp);
void throttle_config(ThrottleState *ts,
ThrottleTimers *tt,
@@ -124,6 +145,8 @@ void throttle_config(ThrottleState *ts,
void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg);
+void throttle_config_init(ThrottleConfig *cfg);
+
/* usage */
bool throttle_schedule_timer(ThrottleState *ts,
ThrottleTimers *tt,
diff --git a/migration/block.c b/migration/block.c
index a444058cf7..3a8330a4b3 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -786,6 +786,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
int64_t addr;
BlockDriverState *bs, *bs_prev = NULL;
BlockBackend *blk;
+ Error *local_err = NULL;
uint8_t *buf;
int64_t total_sectors = 0;
int nr_sectors;
@@ -824,6 +825,12 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
device_name);
return -EINVAL;
}
+
+ bdrv_invalidate_cache(bs, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
}
if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 33012b86c1..9bf1b22b72 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -273,17 +273,41 @@
#
# @image: the info of image used (since: 1.6)
#
-# @bps_max: #optional total max in bytes (Since 1.7)
+# @bps_max: #optional total throughput limit during bursts,
+# in bytes (Since 1.7)
#
-# @bps_rd_max: #optional read max in bytes (Since 1.7)
+# @bps_rd_max: #optional read throughput limit during bursts,
+# in bytes (Since 1.7)
#
-# @bps_wr_max: #optional write max in bytes (Since 1.7)
+# @bps_wr_max: #optional write throughput limit during bursts,
+# in bytes (Since 1.7)
#
-# @iops_max: #optional total I/O operations max (Since 1.7)
+# @iops_max: #optional total I/O operations per second during bursts,
+# in bytes (Since 1.7)
#
-# @iops_rd_max: #optional read I/O operations max (Since 1.7)
+# @iops_rd_max: #optional read I/O operations per second during bursts,
+# in bytes (Since 1.7)
#
-# @iops_wr_max: #optional write I/O operations max (Since 1.7)
+# @iops_wr_max: #optional write I/O operations per second during bursts,
+# in bytes (Since 1.7)
+#
+# @bps_max_length: #optional maximum length of the @bps_max burst
+# period, in seconds. (Since 2.6)
+#
+# @bps_rd_max_length: #optional maximum length of the @bps_rd_max
+# burst period, in seconds. (Since 2.6)
+#
+# @bps_wr_max_length: #optional maximum length of the @bps_wr_max
+# burst period, in seconds. (Since 2.6)
+#
+# @iops_max_length: #optional maximum length of the @iops burst
+# period, in seconds. (Since 2.6)
+#
+# @iops_rd_max_length: #optional maximum length of the @iops_rd_max
+# burst period, in seconds. (Since 2.6)
+#
+# @iops_wr_max_length: #optional maximum length of the @iops_wr_max
+# burst period, in seconds. (Since 2.6)
#
# @iops_size: #optional an I/O size in bytes (Since 1.7)
#
@@ -308,6 +332,9 @@
'*bps_max': 'int', '*bps_rd_max': 'int',
'*bps_wr_max': 'int', '*iops_max': 'int',
'*iops_rd_max': 'int', '*iops_wr_max': 'int',
+ '*bps_max_length': 'int', '*bps_rd_max_length': 'int',
+ '*bps_wr_max_length': 'int', '*iops_max_length': 'int',
+ '*iops_rd_max_length': 'int', '*iops_wr_max_length': 'int',
'*iops_size': 'int', '*group': 'str', 'cache': 'BlockdevCacheInfo',
'write_threshold': 'int' } }
@@ -1294,21 +1321,57 @@
#
# @iops: total I/O operations per second
#
-# @ops_rd: read I/O operations per second
+# @iops_rd: read I/O operations per second
#
# @iops_wr: write I/O operations per second
#
-# @bps_max: #optional total max in bytes (Since 1.7)
+# @bps_max: #optional total throughput limit during bursts,
+# in bytes (Since 1.7)
+#
+# @bps_rd_max: #optional read throughput limit during bursts,
+# in bytes (Since 1.7)
+#
+# @bps_wr_max: #optional write throughput limit during bursts,
+# in bytes (Since 1.7)
+#
+# @iops_max: #optional total I/O operations per second during bursts,
+# in bytes (Since 1.7)
+#
+# @iops_rd_max: #optional read I/O operations per second during bursts,
+# in bytes (Since 1.7)
+#
+# @iops_wr_max: #optional write I/O operations per second during bursts,
+# in bytes (Since 1.7)
+#
+# @bps_max_length: #optional maximum length of the @bps_max burst
+# period, in seconds. It must only
+# be set if @bps_max is set as well.
+# Defaults to 1. (Since 2.6)
#
-# @bps_rd_max: #optional read max in bytes (Since 1.7)
+# @bps_rd_max_length: #optional maximum length of the @bps_rd_max
+# burst period, in seconds. It must only
+# be set if @bps_rd_max is set as well.
+# Defaults to 1. (Since 2.6)
#
-# @bps_wr_max: #optional write max in bytes (Since 1.7)
+# @bps_wr_max_length: #optional maximum length of the @bps_wr_max
+# burst period, in seconds. It must only
+# be set if @bps_wr_max is set as well.
+# Defaults to 1. (Since 2.6)
#
-# @iops_max: #optional total I/O operations max (Since 1.7)
+# @iops_max_length: #optional maximum length of the @iops burst
+# period, in seconds. It must only
+# be set if @iops_max is set as well.
+# Defaults to 1. (Since 2.6)
#
-# @iops_rd_max: #optional read I/O operations max (Since 1.7)
+# @iops_rd_max_length: #optional maximum length of the @iops_rd_max
+# burst period, in seconds. It must only
+# be set if @iops_rd_max is set as well.
+# Defaults to 1. (Since 2.6)
#
-# @iops_wr_max: #optional write I/O operations max (Since 1.7)
+# @iops_wr_max_length: #optional maximum length of the @iops_wr_max
+# burst period, in seconds. It must only
+# be set if @iops_wr_max is set as well.
+# Defaults to 1. (Since 2.6)
#
# @iops_size: #optional an I/O size in bytes (Since 1.7)
#
@@ -1325,6 +1388,9 @@
'*bps_max': 'int', '*bps_rd_max': 'int',
'*bps_wr_max': 'int', '*iops_max': 'int',
'*iops_rd_max': 'int', '*iops_wr_max': 'int',
+ '*bps_max_length': 'int', '*bps_rd_max_length': 'int',
+ '*bps_wr_max_length': 'int', '*iops_max_length': 'int',
+ '*iops_rd_max_length': 'int', '*iops_wr_max_length': 'int',
'*iops_size': 'int', '*group': 'str' } }
##
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 95677745f9..e7cded6e24 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -10,68 +10,68 @@ STEXI
ETEXI
DEF("check", img_check,
- "check [-q] [-f fmt] [--output=ofmt] [-r [leaks | all]] [-T src_cache] filename")
+ "check [-q] [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [-r [leaks | all]] [-T src_cache] filename")
STEXI
-@item check [-q] [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] [-T @var{src_cache}] @var{filename}
+@item check [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] [-T @var{src_cache}] @var{filename}
ETEXI
DEF("create", img_create,
- "create [-q] [-f fmt] [-o options] filename [size]")
+ "create [-q] [--object objectdef] [--image-opts] [-f fmt] [-o options] filename [size]")
STEXI
-@item create [-q] [-f @var{fmt}] [-o @var{options}] @var{filename} [@var{size}]
+@item create [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [-o @var{options}] @var{filename} [@var{size}]
ETEXI
DEF("commit", img_commit,
- "commit [-q] [-f fmt] [-t cache] [-b base] [-d] [-p] filename")
+ "commit [-q] [--object objectdef] [--image-opts] [-f fmt] [-t cache] [-b base] [-d] [-p] filename")
STEXI
-@item commit [-q] [-f @var{fmt}] [-t @var{cache}] [-b @var{base}] [-d] [-p] @var{filename}
+@item commit [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [-t @var{cache}] [-b @var{base}] [-d] [-p] @var{filename}
ETEXI
DEF("compare", img_compare,
- "compare [-f fmt] [-F fmt] [-T src_cache] [-p] [-q] [-s] filename1 filename2")
+ "compare [--object objectdef] [--image-opts] [-f fmt] [-F fmt] [-T src_cache] [-p] [-q] [-s] filename1 filename2")
STEXI
-@item compare [-f @var{fmt}] [-F @var{fmt}] [-T @var{src_cache}] [-p] [-q] [-s] @var{filename1} @var{filename2}
+@item compare [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [-F @var{fmt}] [-T @var{src_cache}] [-p] [-q] [-s] @var{filename1} @var{filename2}
ETEXI
DEF("convert", img_convert,
- "convert [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] filename [filename2 [...]] output_filename")
+ "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] filename [filename2 [...]] output_filename")
STEXI
-@item convert [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
ETEXI
DEF("info", img_info,
- "info [-f fmt] [--output=ofmt] [--backing-chain] filename")
+ "info [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [--backing-chain] filename")
STEXI
-@item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename}
+@item info [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename}
ETEXI
DEF("map", img_map,
- "map [-f fmt] [--output=ofmt] filename")
+ "map [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] filename")
STEXI
-@item map [-f @var{fmt}] [--output=@var{ofmt}] @var{filename}
+@item map [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] @var{filename}
ETEXI
DEF("snapshot", img_snapshot,
- "snapshot [-q] [-l | -a snapshot | -c snapshot | -d snapshot] filename")
+ "snapshot [--object objectdef] [--image-opts] [-q] [-l | -a snapshot | -c snapshot | -d snapshot] filename")
STEXI
-@item snapshot [-q] [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot}] @var{filename}
+@item snapshot [--object @var{objectdef}] [--image-opts] [-q] [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot}] @var{filename}
ETEXI
DEF("rebase", img_rebase,
- "rebase [-q] [-f fmt] [-t cache] [-T src_cache] [-p] [-u] -b backing_file [-F backing_fmt] filename")
+ "rebase [--object objectdef] [--image-opts] [-q] [-f fmt] [-t cache] [-T src_cache] [-p] [-u] -b backing_file [-F backing_fmt] filename")
STEXI
-@item rebase [-q] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-p] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename}
+@item rebase [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-p] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename}
ETEXI
DEF("resize", img_resize,
- "resize [-q] filename [+ | -]size")
+ "resize [--object objectdef] [--image-opts] [-q] filename [+ | -]size")
STEXI
-@item resize [-q] @var{filename} [+ | -]@var{size}
+@item resize [--object @var{objectdef}] [--image-opts] [-q] @var{filename} [+ | -]@var{size}
ETEXI
DEF("amend", img_amend,
- "amend [-p] [-q] [-f fmt] [-t cache] -o options filename")
+ "amend [--object objectdef] [--image-opts] [-p] [-q] [-f fmt] [-t cache] -o options filename")
STEXI
-@item amend [-p] [-q] [-f @var{fmt}] [-t @var{cache}] -o @var{options} @var{filename}
+@item amend [--object @var{objectdef}] [--image-opts] [-p] [-q] [-f @var{fmt}] [-t @var{cache}] -o @var{options} @var{filename}
@end table
ETEXI
diff --git a/qemu-img.c b/qemu-img.c
index 7030107da2..2edb139073 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -27,8 +27,10 @@
#include "qapi/qmp/qerror.h"
#include "qapi/qmp/qjson.h"
#include "qemu-common.h"
+#include "qemu/config-file.h"
#include "qemu/option.h"
#include "qemu/error-report.h"
+#include "qom/object_interfaces.h"
#include "sysemu/sysemu.h"
#include "sysemu/block-backend.h"
#include "block/block_int.h"
@@ -47,6 +49,8 @@ typedef struct img_cmd_t {
enum {
OPTION_OUTPUT = 256,
OPTION_BACKING_CHAIN = 257,
+ OPTION_OBJECT = 258,
+ OPTION_IMAGE_OPTS = 259,
};
typedef enum OutputFormat {
@@ -94,6 +98,10 @@ static void QEMU_NORETURN help(void)
"\n"
"Command parameters:\n"
" 'filename' is a disk image filename\n"
+ " 'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
+ " manual page for a description of the object properties. The most common\n"
+ " object type is a 'secret', which is used to supply passwords and/or\n"
+ " encryption keys.\n"
" 'fmt' is the disk image format. It is guessed automatically in most cases\n"
" 'cache' is the cache mode used to write the output disk image, the valid\n"
" options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
@@ -154,6 +162,24 @@ static void QEMU_NORETURN help(void)
exit(EXIT_SUCCESS);
}
+static QemuOptsList qemu_object_opts = {
+ .name = "object",
+ .implied_opt_name = "qom-type",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
+ .desc = {
+ { }
+ },
+};
+
+static QemuOptsList qemu_source_opts = {
+ .name = "source",
+ .implied_opt_name = "file",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
+ .desc = {
+ { }
+ },
+};
+
static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
{
int ret = 0;
@@ -196,13 +222,56 @@ static int print_block_option_help(const char *filename, const char *fmt)
return 0;
}
-static BlockBackend *img_open(const char *id, const char *filename,
- const char *fmt, int flags,
- bool require_io, bool quiet)
+
+static int img_open_password(BlockBackend *blk, const char *filename,
+ bool require_io, bool quiet)
{
- BlockBackend *blk;
BlockDriverState *bs;
char password[256];
+
+ bs = blk_bs(blk);
+ if (bdrv_is_encrypted(bs) && require_io) {
+ qprintf(quiet, "Disk image '%s' is encrypted.\n", filename);
+ if (qemu_read_password(password, sizeof(password)) < 0) {
+ error_report("No password given");
+ return -1;
+ }
+ if (bdrv_set_key(bs, password) < 0) {
+ error_report("invalid password");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+
+static BlockBackend *img_open_opts(const char *id,
+ const char *optstr,
+ QemuOpts *opts, int flags,
+ bool require_io, bool quiet)
+{
+ QDict *options;
+ Error *local_err = NULL;
+ BlockBackend *blk;
+ options = qemu_opts_to_qdict(opts, NULL);
+ blk = blk_new_open(id, NULL, NULL, options, flags, &local_err);
+ if (!blk) {
+ error_reportf_err(local_err, "Could not open '%s'", optstr);
+ return NULL;
+ }
+
+ if (img_open_password(blk, optstr, require_io, quiet) < 0) {
+ blk_unref(blk);
+ return NULL;
+ }
+ return blk;
+}
+
+static BlockBackend *img_open_file(const char *id, const char *filename,
+ const char *fmt, int flags,
+ bool require_io, bool quiet)
+{
+ BlockBackend *blk;
Error *local_err = NULL;
QDict *options = NULL;
@@ -214,27 +283,43 @@ static BlockBackend *img_open(const char *id, const char *filename,
blk = blk_new_open(id, filename, NULL, options, flags, &local_err);
if (!blk) {
error_reportf_err(local_err, "Could not open '%s': ", filename);
- goto fail;
+ return NULL;
}
- bs = blk_bs(blk);
- if (bdrv_is_encrypted(bs) && require_io) {
- qprintf(quiet, "Disk image '%s' is encrypted.\n", filename);
- if (qemu_read_password(password, sizeof(password)) < 0) {
- error_report("No password given");
- goto fail;
- }
- if (bdrv_set_key(bs, password) < 0) {
- error_report("invalid password");
- goto fail;
- }
+ if (img_open_password(blk, filename, require_io, quiet) < 0) {
+ blk_unref(blk);
+ return NULL;
}
return blk;
-fail:
- blk_unref(blk);
- return NULL;
}
+
+static BlockBackend *img_open(const char *id,
+ bool image_opts,
+ const char *filename,
+ const char *fmt, int flags,
+ bool require_io, bool quiet)
+{
+ BlockBackend *blk;
+ if (image_opts) {
+ QemuOpts *opts;
+ if (fmt) {
+ error_report("--image-opts and --format are mutually exclusive");
+ return NULL;
+ }
+ opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
+ filename, true);
+ if (!opts) {
+ return NULL;
+ }
+ blk = img_open_opts(id, filename, opts, flags, true, quiet);
+ } else {
+ blk = img_open_file(id, filename, fmt, flags, true, quiet);
+ }
+ return blk;
+}
+
+
static int add_old_style_options(const char *fmt, QemuOpts *opts,
const char *base_filename,
const char *base_fmt)
@@ -275,7 +360,13 @@ static int img_create(int argc, char **argv)
bool quiet = false;
for(;;) {
- c = getopt(argc, argv, "F:b:f:he6o:q");
+ static const struct option long_options[] = {
+ {"help", no_argument, 0, 'h'},
+ {"object", required_argument, 0, OPTION_OBJECT},
+ {0, 0, 0, 0}
+ };
+ c = getopt_long(argc, argv, "F:b:f:he6o:q",
+ long_options, NULL);
if (c == -1) {
break;
}
@@ -317,6 +408,14 @@ static int img_create(int argc, char **argv)
case 'q':
quiet = true;
break;
+ case OPTION_OBJECT: {
+ QemuOpts *opts;
+ opts = qemu_opts_parse_noisily(&qemu_object_opts,
+ optarg, true);
+ if (!opts) {
+ goto fail;
+ }
+ } break;
}
}
@@ -332,6 +431,13 @@ static int img_create(int argc, char **argv)
}
optind++;
+ if (qemu_opts_foreach(&qemu_object_opts,
+ user_creatable_add_opts_foreach,
+ NULL, &local_err)) {
+ error_report_err(local_err);
+ goto fail;
+ }
+
/* Get image size, if specified */
if (optind < argc) {
int64_t sval;
@@ -489,6 +595,8 @@ static int img_check(int argc, char **argv)
int flags = BDRV_O_FLAGS | BDRV_O_CHECK;
ImageCheck *check;
bool quiet = false;
+ Error *local_err = NULL;
+ bool image_opts = false;
fmt = NULL;
output = NULL;
@@ -500,6 +608,8 @@ static int img_check(int argc, char **argv)
{"format", required_argument, 0, 'f'},
{"repair", required_argument, 0, 'r'},
{"output", required_argument, 0, OPTION_OUTPUT},
+ {"object", required_argument, 0, OPTION_OBJECT},
+ {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "hf:r:T:q",
@@ -536,6 +646,17 @@ static int img_check(int argc, char **argv)
case 'q':
quiet = true;
break;
+ case OPTION_OBJECT: {
+ QemuOpts *opts;
+ opts = qemu_opts_parse_noisily(&qemu_object_opts,
+ optarg, true);
+ if (!opts) {
+ return 1;
+ }
+ } break;
+ case OPTION_IMAGE_OPTS:
+ image_opts = true;
+ break;
}
}
if (optind != argc - 1) {
@@ -552,13 +673,20 @@ static int img_check(int argc, char **argv)
return 1;
}
+ if (qemu_opts_foreach(&qemu_object_opts,
+ user_creatable_add_opts_foreach,
+ NULL, &local_err)) {
+ error_report_err(local_err);
+ return 1;
+ }
+
ret = bdrv_parse_cache_flags(cache, &flags);
if (ret < 0) {
error_report("Invalid source cache option: %s", cache);
return 1;
}
- blk = img_open("image", filename, fmt, flags, true, quiet);
+ blk = img_open("image", image_opts, filename, fmt, flags, true, quiet);
if (!blk) {
return 1;
}
@@ -670,12 +798,20 @@ static int img_commit(int argc, char **argv)
bool progress = false, quiet = false, drop = false;
Error *local_err = NULL;
CommonBlockJobCBInfo cbi;
+ bool image_opts = false;
fmt = NULL;
cache = BDRV_DEFAULT_CACHE;
base = NULL;
for(;;) {
- c = getopt(argc, argv, "f:ht:b:dpq");
+ static const struct option long_options[] = {
+ {"help", no_argument, 0, 'h'},
+ {"object", required_argument, 0, OPTION_OBJECT},
+ {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+ {0, 0, 0, 0}
+ };
+ c = getopt_long(argc, argv, "f:ht:b:dpq",
+ long_options, NULL);
if (c == -1) {
break;
}
@@ -704,6 +840,17 @@ static int img_commit(int argc, char **argv)
case 'q':
quiet = true;
break;
+ case OPTION_OBJECT: {
+ QemuOpts *opts;
+ opts = qemu_opts_parse_noisily(&qemu_object_opts,
+ optarg, true);
+ if (!opts) {
+ return 1;
+ }
+ } break;
+ case OPTION_IMAGE_OPTS:
+ image_opts = true;
+ break;
}
}
@@ -717,6 +864,13 @@ static int img_commit(int argc, char **argv)
}
filename = argv[optind++];
+ if (qemu_opts_foreach(&qemu_object_opts,
+ user_creatable_add_opts_foreach,
+ NULL, &local_err)) {
+ error_report_err(local_err);
+ return 1;
+ }
+
flags = BDRV_O_RDWR | BDRV_O_UNMAP;
ret = bdrv_parse_cache_flags(cache, &flags);
if (ret < 0) {
@@ -724,7 +878,7 @@ static int img_commit(int argc, char **argv)
return 1;
}
- blk = img_open("image", filename, fmt, flags, true, quiet);
+ blk = img_open("image", image_opts, filename, fmt, flags, true, quiet);
if (!blk) {
return 1;
}
@@ -973,10 +1127,19 @@ static int img_compare(int argc, char **argv)
int64_t nb_sectors;
int c, pnum;
uint64_t progress_base;
+ Error *local_err = NULL;
+ bool image_opts = false;
cache = BDRV_DEFAULT_CACHE;
for (;;) {
- c = getopt(argc, argv, "hf:F:T:pqs");
+ static const struct option long_options[] = {
+ {"help", no_argument, 0, 'h'},
+ {"object", required_argument, 0, OPTION_OBJECT},
+ {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+ {0, 0, 0, 0}
+ };
+ c = getopt_long(argc, argv, "hf:F:T:pqs",
+ long_options, NULL);
if (c == -1) {
break;
}
@@ -1003,6 +1166,18 @@ static int img_compare(int argc, char **argv)
case 's':
strict = true;
break;
+ case OPTION_OBJECT: {
+ QemuOpts *opts;
+ opts = qemu_opts_parse_noisily(&qemu_object_opts,
+ optarg, true);
+ if (!opts) {
+ ret = 2;
+ goto out4;
+ }
+ } break;
+ case OPTION_IMAGE_OPTS:
+ image_opts = true;
+ break;
}
}
@@ -1018,6 +1193,14 @@ static int img_compare(int argc, char **argv)
filename1 = argv[optind++];
filename2 = argv[optind++];
+ if (qemu_opts_foreach(&qemu_object_opts,
+ user_creatable_add_opts_foreach,
+ NULL, &local_err)) {
+ error_report_err(local_err);
+ ret = 2;
+ goto out4;
+ }
+
/* Initialize before goto out */
qemu_progress_init(progress, 2.0);
@@ -1029,18 +1212,18 @@ static int img_compare(int argc, char **argv)
goto out3;
}
- blk1 = img_open("image_1", filename1, fmt1, flags, true, quiet);
+ blk1 = img_open("image_1", image_opts, filename1, fmt1, flags, true, quiet);
if (!blk1) {
ret = 2;
goto out3;
}
- bs1 = blk_bs(blk1);
- blk2 = img_open("image_2", filename2, fmt2, flags, true, quiet);
+ blk2 = img_open("image_2", image_opts, filename2, fmt2, flags, true, quiet);
if (!blk2) {
ret = 2;
goto out2;
}
+ bs1 = blk_bs(blk1);
bs2 = blk_bs(blk2);
buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
@@ -1225,6 +1408,7 @@ out2:
blk_unref(blk1);
out3:
qemu_progress_end();
+out4:
return ret;
}
@@ -1546,6 +1730,7 @@ static int img_convert(int argc, char **argv)
Error *local_err = NULL;
QemuOpts *sn_opts = NULL;
ImgConvertState state;
+ bool image_opts = false;
fmt = NULL;
out_fmt = "raw";
@@ -1555,7 +1740,14 @@ static int img_convert(int argc, char **argv)
compress = 0;
skip_create = 0;
for(;;) {
- c = getopt(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qn");
+ static const struct option long_options[] = {
+ {"help", no_argument, 0, 'h'},
+ {"object", required_argument, 0, OPTION_OBJECT},
+ {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+ {0, 0, 0, 0}
+ };
+ c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qn",
+ long_options, NULL);
if (c == -1) {
break;
}
@@ -1646,16 +1838,32 @@ static int img_convert(int argc, char **argv)
case 'n':
skip_create = 1;
break;
+ case OPTION_OBJECT:
+ opts = qemu_opts_parse_noisily(&qemu_object_opts,
+ optarg, true);
+ if (!opts) {
+ goto fail_getopt;
+ }
+ break;
+ case OPTION_IMAGE_OPTS:
+ image_opts = true;
+ break;
}
}
+ if (qemu_opts_foreach(&qemu_object_opts,
+ user_creatable_add_opts_foreach,
+ NULL, &local_err)) {
+ error_report_err(local_err);
+ goto fail_getopt;
+ }
+
/* Initialize before goto out */
if (quiet) {
progress = 0;
}
qemu_progress_init(progress, 1.0);
-
bs_n = argc - optind - 1;
out_filename = bs_n >= 1 ? argv[argc - 1] : NULL;
@@ -1693,8 +1901,8 @@ static int img_convert(int argc, char **argv)
for (bs_i = 0; bs_i < bs_n; bs_i++) {
char *id = bs_n > 1 ? g_strdup_printf("source_%d", bs_i)
: g_strdup("source");
- blk[bs_i] = img_open(id, argv[optind + bs_i], fmt, src_flags,
- true, quiet);
+ blk[bs_i] = img_open(id, image_opts, argv[optind + bs_i],
+ fmt, src_flags, true, quiet);
g_free(id);
if (!blk[bs_i]) {
ret = -1;
@@ -1835,7 +2043,13 @@ static int img_convert(int argc, char **argv)
goto out;
}
- out_blk = img_open("target", out_filename, out_fmt, flags, true, quiet);
+ /* XXX we should allow --image-opts to trigger use of
+ * img_open() here, but then we have trouble with
+ * the bdrv_create() call which takes different params.
+ * Not critical right now, so fix can wait...
+ */
+ out_blk = img_open_file("target", out_filename,
+ out_fmt, flags, true, quiet);
if (!out_blk) {
ret = -1;
goto out;
@@ -2001,7 +2215,8 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b)
* image file. If there was an error a message will have been printed to
* stderr.
*/
-static ImageInfoList *collect_image_info_list(const char *filename,
+static ImageInfoList *collect_image_info_list(bool image_opts,
+ const char *filename,
const char *fmt,
bool chain)
{
@@ -2025,8 +2240,9 @@ static ImageInfoList *collect_image_info_list(const char *filename,
}
g_hash_table_insert(filenames, (gpointer)filename, NULL);
- blk = img_open("image", filename, fmt,
- BDRV_O_FLAGS | BDRV_O_NO_BACKING, false, false);
+ blk = img_open("image", image_opts, filename, fmt,
+ BDRV_O_FLAGS | BDRV_O_NO_BACKING,
+ false, false);
if (!blk) {
goto err;
}
@@ -2077,6 +2293,8 @@ static int img_info(int argc, char **argv)
bool chain = false;
const char *filename, *fmt, *output;
ImageInfoList *list;
+ Error *local_err = NULL;
+ bool image_opts = false;
fmt = NULL;
output = NULL;
@@ -2087,6 +2305,8 @@ static int img_info(int argc, char **argv)
{"format", required_argument, 0, 'f'},
{"output", required_argument, 0, OPTION_OUTPUT},
{"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
+ {"object", required_argument, 0, OPTION_OBJECT},
+ {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "f:h",
@@ -2108,6 +2328,17 @@ static int img_info(int argc, char **argv)
case OPTION_BACKING_CHAIN:
chain = true;
break;
+ case OPTION_OBJECT: {
+ QemuOpts *opts;
+ opts = qemu_opts_parse_noisily(&qemu_object_opts,
+ optarg, true);
+ if (!opts) {
+ return 1;
+ }
+ } break;
+ case OPTION_IMAGE_OPTS:
+ image_opts = true;
+ break;
}
}
if (optind != argc - 1) {
@@ -2124,7 +2355,14 @@ static int img_info(int argc, char **argv)
return 1;
}
- list = collect_image_info_list(filename, fmt, chain);
+ if (qemu_opts_foreach(&qemu_object_opts,
+ user_creatable_add_opts_foreach,
+ NULL, &local_err)) {
+ error_report_err(local_err);
+ return 1;
+ }
+
+ list = collect_image_info_list(image_opts, filename, fmt, chain);
if (!list) {
return 1;
}
@@ -2194,6 +2432,7 @@ static int get_block_status(BlockDriverState *bs, int64_t sector_num,
int64_t ret;
int depth;
BlockDriverState *file;
+ bool has_offset;
/* As an optimization, we could cache the current range of unallocated
* clusters in each file of the chain, and avoid querying the same
@@ -2220,17 +2459,20 @@ static int get_block_status(BlockDriverState *bs, int64_t sector_num,
depth++;
}
- e->start = sector_num * BDRV_SECTOR_SIZE;
- e->length = nb_sectors * BDRV_SECTOR_SIZE;
- e->data = !!(ret & BDRV_BLOCK_DATA);
- e->zero = !!(ret & BDRV_BLOCK_ZERO);
- e->offset = ret & BDRV_BLOCK_OFFSET_MASK;
- e->has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
- e->depth = depth;
- if (file && e->has_offset) {
- e->has_filename = true;
- e->filename = file->filename;
- }
+ has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
+
+ *e = (MapEntry) {
+ .start = sector_num * BDRV_SECTOR_SIZE,
+ .length = nb_sectors * BDRV_SECTOR_SIZE,
+ .data = !!(ret & BDRV_BLOCK_DATA),
+ .zero = !!(ret & BDRV_BLOCK_ZERO),
+ .offset = ret & BDRV_BLOCK_OFFSET_MASK,
+ .has_offset = has_offset,
+ .depth = depth,
+ .has_filename = file && has_offset,
+ .filename = file && has_offset ? file->filename : NULL,
+ };
+
return 0;
}
@@ -2265,6 +2507,8 @@ static int img_map(int argc, char **argv)
int64_t length;
MapEntry curr = { .length = 0 }, next;
int ret = 0;
+ Error *local_err = NULL;
+ bool image_opts = false;
fmt = NULL;
output = NULL;
@@ -2274,6 +2518,8 @@ static int img_map(int argc, char **argv)
{"help", no_argument, 0, 'h'},
{"format", required_argument, 0, 'f'},
{"output", required_argument, 0, OPTION_OUTPUT},
+ {"object", required_argument, 0, OPTION_OBJECT},
+ {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "f:h",
@@ -2292,6 +2538,17 @@ static int img_map(int argc, char **argv)
case OPTION_OUTPUT:
output = optarg;
break;
+ case OPTION_OBJECT: {
+ QemuOpts *opts;
+ opts = qemu_opts_parse_noisily(&qemu_object_opts,
+ optarg, true);
+ if (!opts) {
+ return 1;
+ }
+ } break;
+ case OPTION_IMAGE_OPTS:
+ image_opts = true;
+ break;
}
}
if (optind != argc - 1) {
@@ -2308,7 +2565,15 @@ static int img_map(int argc, char **argv)
return 1;
}
- blk = img_open("image", filename, fmt, BDRV_O_FLAGS, true, false);
+ if (qemu_opts_foreach(&qemu_object_opts,
+ user_creatable_add_opts_foreach,
+ NULL, &local_err)) {
+ error_report_err(local_err);
+ return 1;
+ }
+
+ blk = img_open("image", image_opts, filename, fmt,
+ BDRV_O_FLAGS, true, false);
if (!blk) {
return 1;
}
@@ -2370,11 +2635,19 @@ static int img_snapshot(int argc, char **argv)
qemu_timeval tv;
bool quiet = false;
Error *err = NULL;
+ bool image_opts = false;
bdrv_oflags = BDRV_O_FLAGS | BDRV_O_RDWR;
/* Parse commandline parameters */
for(;;) {
- c = getopt(argc, argv, "la:c:d:hq");
+ static const struct option long_options[] = {
+ {"help", no_argument, 0, 'h'},
+ {"object", required_argument, 0, OPTION_OBJECT},
+ {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+ {0, 0, 0, 0}
+ };
+ c = getopt_long(argc, argv, "la:c:d:hq",
+ long_options, NULL);
if (c == -1) {
break;
}
@@ -2418,6 +2691,17 @@ static int img_snapshot(int argc, char **argv)
case 'q':
quiet = true;
break;
+ case OPTION_OBJECT: {
+ QemuOpts *opts;
+ opts = qemu_opts_parse_noisily(&qemu_object_opts,
+ optarg, true);
+ if (!opts) {
+ return 1;
+ }
+ } break;
+ case OPTION_IMAGE_OPTS:
+ image_opts = true;
+ break;
}
}
@@ -2426,8 +2710,16 @@ static int img_snapshot(int argc, char **argv)
}
filename = argv[optind++];
+ if (qemu_opts_foreach(&qemu_object_opts,
+ user_creatable_add_opts_foreach,
+ NULL, &err)) {
+ error_report_err(err);
+ return 1;
+ }
+
/* Open the image */
- blk = img_open("image", filename, NULL, bdrv_oflags, true, quiet);
+ blk = img_open("image", image_opts, filename, NULL,
+ bdrv_oflags, true, quiet);
if (!blk) {
return 1;
}
@@ -2491,6 +2783,7 @@ static int img_rebase(int argc, char **argv)
int progress = 0;
bool quiet = false;
Error *local_err = NULL;
+ bool image_opts = false;
/* Parse commandline parameters */
fmt = NULL;
@@ -2499,7 +2792,14 @@ static int img_rebase(int argc, char **argv)
out_baseimg = NULL;
out_basefmt = NULL;
for(;;) {
- c = getopt(argc, argv, "hf:F:b:upt:T:q");
+ static const struct option long_options[] = {
+ {"help", no_argument, 0, 'h'},
+ {"object", required_argument, 0, OPTION_OBJECT},
+ {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+ {0, 0, 0, 0}
+ };
+ c = getopt_long(argc, argv, "hf:F:b:upt:T:q",
+ long_options, NULL);
if (c == -1) {
break;
}
@@ -2532,6 +2832,17 @@ static int img_rebase(int argc, char **argv)
case 'q':
quiet = true;
break;
+ case OPTION_OBJECT: {
+ QemuOpts *opts;
+ opts = qemu_opts_parse_noisily(&qemu_object_opts,
+ optarg, true);
+ if (!opts) {
+ return 1;
+ }
+ } break;
+ case OPTION_IMAGE_OPTS:
+ image_opts = true;
+ break;
}
}
@@ -2547,6 +2858,13 @@ static int img_rebase(int argc, char **argv)
}
filename = argv[optind++];
+ if (qemu_opts_foreach(&qemu_object_opts,
+ user_creatable_add_opts_foreach,
+ NULL, &local_err)) {
+ error_report_err(local_err);
+ return 1;
+ }
+
qemu_progress_init(progress, 2.0);
qemu_progress_print(0, 100);
@@ -2570,7 +2888,7 @@ static int img_rebase(int argc, char **argv)
* Ignore the old backing file for unsafe rebase in case we want to correct
* the reference to a renamed or moved backing file.
*/
- blk = img_open("image", filename, fmt, flags, true, quiet);
+ blk = img_open("image", image_opts, filename, fmt, flags, true, quiet);
if (!blk) {
ret = -1;
goto out;
@@ -2807,6 +3125,8 @@ static int img_resize(int argc, char **argv)
bool quiet = false;
BlockBackend *blk = NULL;
QemuOpts *param;
+ Error *local_err = NULL;
+
static QemuOptsList resize_options = {
.name = "resize_options",
.head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
@@ -2820,6 +3140,7 @@ static int img_resize(int argc, char **argv)
}
},
};
+ bool image_opts = false;
/* Remove size from argv manually so that negative numbers are not treated
* as options by getopt. */
@@ -2833,7 +3154,14 @@ static int img_resize(int argc, char **argv)
/* Parse getopt arguments */
fmt = NULL;
for(;;) {
- c = getopt(argc, argv, "f:hq");
+ static const struct option long_options[] = {
+ {"help", no_argument, 0, 'h'},
+ {"object", required_argument, 0, OPTION_OBJECT},
+ {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+ {0, 0, 0, 0}
+ };
+ c = getopt_long(argc, argv, "f:hq",
+ long_options, NULL);
if (c == -1) {
break;
}
@@ -2848,6 +3176,17 @@ static int img_resize(int argc, char **argv)
case 'q':
quiet = true;
break;
+ case OPTION_OBJECT: {
+ QemuOpts *opts;
+ opts = qemu_opts_parse_noisily(&qemu_object_opts,
+ optarg, true);
+ if (!opts) {
+ return 1;
+ }
+ } break;
+ case OPTION_IMAGE_OPTS:
+ image_opts = true;
+ break;
}
}
if (optind != argc - 1) {
@@ -2855,6 +3194,13 @@ static int img_resize(int argc, char **argv)
}
filename = argv[optind++];
+ if (qemu_opts_foreach(&qemu_object_opts,
+ user_creatable_add_opts_foreach,
+ NULL, &local_err)) {
+ error_report_err(local_err);
+ return 1;
+ }
+
/* Choose grow, shrink, or absolute resize mode */
switch (size[0]) {
case '+':
@@ -2882,8 +3228,8 @@ static int img_resize(int argc, char **argv)
n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
qemu_opts_del(param);
- blk = img_open("image", filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR,
- true, quiet);
+ blk = img_open("image", image_opts, filename, fmt,
+ BDRV_O_FLAGS | BDRV_O_RDWR, true, quiet);
if (!blk) {
ret = -1;
goto out;
@@ -2942,10 +3288,19 @@ static int img_amend(int argc, char **argv)
bool quiet = false, progress = false;
BlockBackend *blk = NULL;
BlockDriverState *bs = NULL;
+ Error *local_err = NULL;
+ bool image_opts = false;
cache = BDRV_DEFAULT_CACHE;
for (;;) {
- c = getopt(argc, argv, "ho:f:t:pq");
+ static const struct option long_options[] = {
+ {"help", no_argument, 0, 'h'},
+ {"object", required_argument, 0, OPTION_OBJECT},
+ {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+ {0, 0, 0, 0}
+ };
+ c = getopt_long(argc, argv, "ho:f:t:pq",
+ long_options, NULL);
if (c == -1) {
break;
}
@@ -2981,6 +3336,17 @@ static int img_amend(int argc, char **argv)
case 'q':
quiet = true;
break;
+ case OPTION_OBJECT:
+ opts = qemu_opts_parse_noisily(&qemu_object_opts,
+ optarg, true);
+ if (!opts) {
+ ret = -1;
+ goto out_no_progress;
+ }
+ break;
+ case OPTION_IMAGE_OPTS:
+ image_opts = true;
+ break;
}
}
@@ -2988,6 +3354,14 @@ static int img_amend(int argc, char **argv)
error_exit("Must specify options (-o)");
}
+ if (qemu_opts_foreach(&qemu_object_opts,
+ user_creatable_add_opts_foreach,
+ NULL, &local_err)) {
+ error_report_err(local_err);
+ ret = -1;
+ goto out_no_progress;
+ }
+
if (quiet) {
progress = false;
}
@@ -3014,7 +3388,7 @@ static int img_amend(int argc, char **argv)
goto out;
}
- blk = img_open("image", filename, fmt, flags, true, quiet);
+ blk = img_open("image", image_opts, filename, fmt, flags, true, quiet);
if (!blk) {
ret = -1;
goto out;
@@ -3111,6 +3485,9 @@ int main(int argc, char **argv)
}
cmdname = argv[1];
+ qemu_add_opts(&qemu_object_opts);
+ qemu_add_opts(&qemu_source_opts);
+
/* find the command */
for (cmd = img_cmds; cmd->name != NULL; cmd++) {
if (!strcmp(cmdname, cmd->name)) {
diff --git a/qemu-img.texi b/qemu-img.texi
index 7163a108e2..afaebdd408 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -24,6 +24,20 @@ Command parameters:
@table @var
@item filename
is a disk image filename
+
+@item --object @var{objectdef}
+
+is a QEMU user creatable object definition. See the @code{qemu(1)} manual
+page for a description of the object properties. The most common object
+type is a @code{secret}, which is used to supply passwords and/or encryption
+keys.
+
+@item --image-opts
+
+Indicates that the @var{filename} parameter is to be interpreted as a
+full option string, not a plain filename. This parameter is mutually
+exclusive with the @var{-f} and @var{-F} parameters.
+
@item fmt
is the disk image format. It is guessed automatically in most cases. See below
for a description of the supported disk formats.
diff --git a/qemu-io.c b/qemu-io.c
index 6c0c028f98..8c31257ac4 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -18,6 +18,7 @@
#include "qemu/config-file.h"
#include "qemu/readline.h"
#include "qapi/qmp/qstring.h"
+#include "qom/object_interfaces.h"
#include "sysemu/block-backend.h"
#include "block/block_int.h"
#include "trace/control.h"
@@ -31,6 +32,7 @@ static BlockBackend *qemuio_blk;
/* qemu-io commands passed using -c */
static int ncmdline;
static char **cmdline;
+static bool imageOpts;
static ReadLineState *readline_state;
@@ -150,6 +152,10 @@ static int open_f(BlockBackend *blk, int argc, char **argv)
readonly = 1;
break;
case 'o':
+ if (imageOpts) {
+ printf("--image-opts and 'open -o' are mutually exclusive\n");
+ return 0;
+ }
if (!qemu_opts_parse_noisily(&empty_opts, optarg, false)) {
qemu_opts_reset(&empty_opts);
return 0;
@@ -165,6 +171,14 @@ static int open_f(BlockBackend *blk, int argc, char **argv)
flags |= BDRV_O_RDWR;
}
+ if (imageOpts && (optind == argc - 1)) {
+ if (!qemu_opts_parse_noisily(&empty_opts, argv[optind], false)) {
+ qemu_opts_reset(&empty_opts);
+ return 0;
+ }
+ optind++;
+ }
+
qopts = qemu_opts_find(&empty_opts, NULL);
opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL;
qemu_opts_reset(&empty_opts);
@@ -200,6 +214,8 @@ static void usage(const char *name)
"Usage: %s [-h] [-V] [-rsnm] [-f FMT] [-c STRING] ... [file]\n"
"QEMU Disk exerciser\n"
"\n"
+" --object OBJECTDEF define an object such as 'secret' for\n"
+" passwords and/or encryption keys\n"
" -c, --cmd STRING execute command with its arguments\n"
" from the given string\n"
" -f, --format FMT specifies the block driver to use\n"
@@ -361,24 +377,51 @@ static void reenable_tty_echo(void)
qemu_set_tty_echo(STDIN_FILENO, true);
}
+enum {
+ OPTION_OBJECT = 256,
+ OPTION_IMAGE_OPTS = 257,
+};
+
+static QemuOptsList qemu_object_opts = {
+ .name = "object",
+ .implied_opt_name = "qom-type",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
+ .desc = {
+ { }
+ },
+};
+
+
+static QemuOptsList file_opts = {
+ .name = "file",
+ .implied_opt_name = "file",
+ .head = QTAILQ_HEAD_INITIALIZER(file_opts.head),
+ .desc = {
+ /* no elements => accept any params */
+ { /* end of list */ }
+ },
+};
+
int main(int argc, char **argv)
{
int readonly = 0;
const char *sopt = "hVc:d:f:rsnmgkt:T:";
const struct option lopt[] = {
- { "help", 0, NULL, 'h' },
- { "version", 0, NULL, 'V' },
- { "offset", 1, NULL, 'o' },
- { "cmd", 1, NULL, 'c' },
- { "format", 1, NULL, 'f' },
- { "read-only", 0, NULL, 'r' },
- { "snapshot", 0, NULL, 's' },
- { "nocache", 0, NULL, 'n' },
- { "misalign", 0, NULL, 'm' },
- { "native-aio", 0, NULL, 'k' },
- { "discard", 1, NULL, 'd' },
- { "cache", 1, NULL, 't' },
- { "trace", 1, NULL, 'T' },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ { "offset", required_argument, NULL, 'o' },
+ { "cmd", required_argument, NULL, 'c' },
+ { "format", required_argument, NULL, 'f' },
+ { "read-only", no_argument, NULL, 'r' },
+ { "snapshot", no_argument, NULL, 's' },
+ { "nocache", no_argument, NULL, 'n' },
+ { "misalign", no_argument, NULL, 'm' },
+ { "native-aio", no_argument, NULL, 'k' },
+ { "discard", required_argument, NULL, 'd' },
+ { "cache", required_argument, NULL, 't' },
+ { "trace", required_argument, NULL, 'T' },
+ { "object", required_argument, NULL, OPTION_OBJECT },
+ { "image-opts", no_argument, NULL, OPTION_IMAGE_OPTS },
{ NULL, 0, NULL, 0 }
};
int c;
@@ -386,6 +429,7 @@ int main(int argc, char **argv)
int flags = BDRV_O_UNMAP;
Error *local_error = NULL;
QDict *opts = NULL;
+ const char *format = NULL;
#ifdef CONFIG_POSIX
signal(SIGPIPE, SIG_IGN);
@@ -395,6 +439,7 @@ int main(int argc, char **argv)
qemu_init_exec_dir(argv[0]);
module_call_init(MODULE_INIT_QOM);
+ qemu_add_opts(&qemu_object_opts);
bdrv_init();
while ((c = getopt_long(argc, argv, sopt, lopt, &opt_index)) != -1) {
@@ -412,10 +457,7 @@ int main(int argc, char **argv)
}
break;
case 'f':
- if (!opts) {
- opts = qdict_new();
- }
- qdict_put(opts, "driver", qstring_from_str(optarg));
+ format = optarg;
break;
case 'c':
add_user_command(optarg);
@@ -446,6 +488,17 @@ int main(int argc, char **argv)
case 'h':
usage(progname);
exit(0);
+ case OPTION_OBJECT: {
+ QemuOpts *qopts;
+ qopts = qemu_opts_parse_noisily(&qemu_object_opts,
+ optarg, true);
+ if (!qopts) {
+ exit(1);
+ }
+ } break;
+ case OPTION_IMAGE_OPTS:
+ imageOpts = true;
+ break;
default:
usage(progname);
exit(1);
@@ -457,11 +510,23 @@ int main(int argc, char **argv)
exit(1);
}
+ if (format && imageOpts) {
+ error_report("--image-opts and -f are mutually exclusive");
+ exit(1);
+ }
+
if (qemu_init_main_loop(&local_error)) {
error_report_err(local_error);
exit(1);
}
+ if (qemu_opts_foreach(&qemu_object_opts,
+ user_creatable_add_opts_foreach,
+ NULL, &local_error)) {
+ error_report_err(local_error);
+ exit(1);
+ }
+
/* initialize commands */
qemuio_add_command(&quit_cmd);
qemuio_add_command(&open_cmd);
@@ -482,7 +547,21 @@ int main(int argc, char **argv)
}
if ((argc - optind) == 1) {
- openfile(argv[optind], flags, opts);
+ if (imageOpts) {
+ QemuOpts *qopts = NULL;
+ qopts = qemu_opts_parse_noisily(&file_opts, argv[optind], false);
+ if (!qopts) {
+ exit(1);
+ }
+ opts = qemu_opts_to_qdict(qopts, NULL);
+ openfile(NULL, flags, opts);
+ } else {
+ if (format) {
+ opts = qdict_new();
+ qdict_put(opts, "driver", qstring_from_str(format));
+ }
+ openfile(argv[optind], flags, opts);
+ }
}
command_loop();
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 933ca4a368..efc427c4f4 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -37,12 +37,13 @@
#include <pthread.h>
#define SOCKET_PATH "/var/lock/qemu-nbd-%s"
-#define QEMU_NBD_OPT_CACHE 1
-#define QEMU_NBD_OPT_AIO 2
-#define QEMU_NBD_OPT_DISCARD 3
-#define QEMU_NBD_OPT_DETECT_ZEROES 4
-#define QEMU_NBD_OPT_OBJECT 5
-#define QEMU_NBD_OPT_TLSCREDS 6
+#define QEMU_NBD_OPT_CACHE 256
+#define QEMU_NBD_OPT_AIO 257
+#define QEMU_NBD_OPT_DISCARD 258
+#define QEMU_NBD_OPT_DETECT_ZEROES 259
+#define QEMU_NBD_OPT_OBJECT 260
+#define QEMU_NBD_OPT_TLSCREDS 261
+#define QEMU_NBD_OPT_IMAGE_OPTS 262
static NBDExport *exp;
static bool newproto;
@@ -105,6 +106,7 @@ static void usage(const char *name)
" --aio=MODE set AIO mode (native or threads)\n"
" --discard=MODE set discard mode (ignore, unmap)\n"
" --detect-zeroes=MODE set detect-zeroes mode (off, on, unmap)\n"
+" --image-opts treat FILE as a full set of image options\n"
"\n"
"Report bugs to <qemu-devel@nongnu.org>\n"
, name, NBD_DEFAULT_PORT, "DEVICE");
@@ -394,6 +396,16 @@ static SocketAddress *nbd_build_socket_address(const char *sockpath,
}
+static QemuOptsList file_opts = {
+ .name = "file",
+ .implied_opt_name = "file",
+ .head = QTAILQ_HEAD_INITIALIZER(file_opts.head),
+ .desc = {
+ /* no elements => accept any params */
+ { /* end of list */ }
+ },
+};
+
static QemuOptsList qemu_object_opts = {
.name = "object",
.implied_opt_name = "qom-type",
@@ -451,30 +463,32 @@ int main(int argc, char **argv)
const char *sn_id_or_name = NULL;
const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:";
struct option lopt[] = {
- { "help", 0, NULL, 'h' },
- { "version", 0, NULL, 'V' },
- { "bind", 1, NULL, 'b' },
- { "port", 1, NULL, 'p' },
- { "socket", 1, NULL, 'k' },
- { "offset", 1, NULL, 'o' },
- { "read-only", 0, NULL, 'r' },
- { "partition", 1, NULL, 'P' },
- { "connect", 1, NULL, 'c' },
- { "disconnect", 0, NULL, 'd' },
- { "snapshot", 0, NULL, 's' },
- { "load-snapshot", 1, NULL, 'l' },
- { "nocache", 0, NULL, 'n' },
- { "cache", 1, NULL, QEMU_NBD_OPT_CACHE },
- { "aio", 1, NULL, QEMU_NBD_OPT_AIO },
- { "discard", 1, NULL, QEMU_NBD_OPT_DISCARD },
- { "detect-zeroes", 1, NULL, QEMU_NBD_OPT_DETECT_ZEROES },
- { "shared", 1, NULL, 'e' },
- { "format", 1, NULL, 'f' },
- { "persistent", 0, NULL, 't' },
- { "verbose", 0, NULL, 'v' },
- { "object", 1, NULL, QEMU_NBD_OPT_OBJECT },
- { "export-name", 1, NULL, 'x' },
- { "tls-creds", 1, NULL, QEMU_NBD_OPT_TLSCREDS },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ { "bind", required_argument, NULL, 'b' },
+ { "port", required_argument, NULL, 'p' },
+ { "socket", required_argument, NULL, 'k' },
+ { "offset", required_argument, NULL, 'o' },
+ { "read-only", no_argument, NULL, 'r' },
+ { "partition", required_argument, NULL, 'P' },
+ { "connect", required_argument, NULL, 'c' },
+ { "disconnect", no_argument, NULL, 'd' },
+ { "snapshot", no_argument, NULL, 's' },
+ { "load-snapshot", required_argument, NULL, 'l' },
+ { "nocache", no_argument, NULL, 'n' },
+ { "cache", required_argument, NULL, QEMU_NBD_OPT_CACHE },
+ { "aio", required_argument, NULL, QEMU_NBD_OPT_AIO },
+ { "discard", required_argument, NULL, QEMU_NBD_OPT_DISCARD },
+ { "detect-zeroes", required_argument, NULL,
+ QEMU_NBD_OPT_DETECT_ZEROES },
+ { "shared", required_argument, NULL, 'e' },
+ { "format", required_argument, NULL, 'f' },
+ { "persistent", no_argument, NULL, 't' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "object", required_argument, NULL, QEMU_NBD_OPT_OBJECT },
+ { "export-name", required_argument, NULL, 'x' },
+ { "tls-creds", required_argument, NULL, QEMU_NBD_OPT_TLSCREDS },
+ { "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS },
{ NULL, 0, NULL, 0 }
};
int ch;
@@ -493,6 +507,7 @@ int main(int argc, char **argv)
QDict *options = NULL;
const char *export_name = NULL;
const char *tlscredsid = NULL;
+ bool imageOpts = false;
/* The client thread uses SIGTERM to interrupt the server. A signal
* handler ensures that "qemu-nbd -v -c" exits with a nice status code.
@@ -672,6 +687,9 @@ int main(int argc, char **argv)
case QEMU_NBD_OPT_TLSCREDS:
tlscredsid = optarg;
break;
+ case QEMU_NBD_OPT_IMAGE_OPTS:
+ imageOpts = true;
+ break;
}
}
@@ -800,13 +818,29 @@ int main(int argc, char **argv)
bdrv_init();
atexit(bdrv_close_all);
- if (fmt) {
- options = qdict_new();
- qdict_put(options, "driver", qstring_from_str(fmt));
+ srcpath = argv[optind];
+ if (imageOpts) {
+ QemuOpts *opts;
+ if (fmt) {
+ error_report("--image-opts and -f are mutually exclusive");
+ exit(EXIT_FAILURE);
+ }
+ opts = qemu_opts_parse_noisily(&file_opts, srcpath, true);
+ if (!opts) {
+ qemu_opts_reset(&file_opts);
+ exit(EXIT_FAILURE);
+ }
+ options = qemu_opts_to_qdict(opts, NULL);
+ qemu_opts_reset(&file_opts);
+ blk = blk_new_open("hda", NULL, NULL, options, flags, &local_err);
+ } else {
+ if (fmt) {
+ options = qdict_new();
+ qdict_put(options, "driver", qstring_from_str(fmt));
+ }
+ blk = blk_new_open("hda", srcpath, NULL, options, flags, &local_err);
}
- srcpath = argv[optind];
- blk = blk_new_open("hda", srcpath, NULL, options, flags, &local_err);
if (!blk) {
error_reportf_err(local_err, "Failed to blk_new_open '%s': ",
argv[optind]);
diff --git a/qemu-nbd.texi b/qemu-nbd.texi
index 227a73ca36..9f23343450 100644
--- a/qemu-nbd.texi
+++ b/qemu-nbd.texi
@@ -13,7 +13,8 @@ Export a QEMU disk image using the NBD protocol.
@c man end
@c man begin OPTIONS
-@var{filename} is a disk image filename.
+@var{filename} is a disk image filename, or a set of block
+driver options if @var{--image-opts} is specified.
@var{dev} is an NBD device.
@@ -33,6 +34,10 @@ The offset into the image
The interface to bind to (default @samp{0.0.0.0})
@item -k, --socket=@var{path}
Use a unix socket with path @var{path}
+@item --image-opts
+Treat @var{filename} as a set of image options, instead of a plain
+filename. If this flag is specified, the @var{-f} flag should
+not be used, instead the '@code{format=}' option should be set.
@item -f, --format=@var{fmt}
Force the use of the block driver for format @var{fmt} instead of
auto-detecting
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 9fb0d788bc..085dc7d5cd 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2006,7 +2006,7 @@ EQMP
{
.name = "block_set_io_throttle",
- .args_type = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l,bps_max:l?,bps_rd_max:l?,bps_wr_max:l?,iops_max:l?,iops_rd_max:l?,iops_wr_max:l?,iops_size:l?,group:s?",
+ .args_type = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l,bps_max:l?,bps_rd_max:l?,bps_wr_max:l?,iops_max:l?,iops_rd_max:l?,iops_wr_max:l?,bps_max_length:l?,bps_rd_max_length:l?,bps_wr_max_length:l?,iops_max_length:l?,iops_rd_max_length:l?,iops_wr_max_length:l?,iops_size:l?,group:s?",
.mhandler.cmd_new = qmp_marshal_block_set_io_throttle,
},
@@ -2025,14 +2025,20 @@ Arguments:
- "iops": total I/O operations per second (json-int)
- "iops_rd": read I/O operations per second (json-int)
- "iops_wr": write I/O operations per second (json-int)
-- "bps_max": total max in bytes (json-int)
-- "bps_rd_max": read max in bytes (json-int)
-- "bps_wr_max": write max in bytes (json-int)
-- "iops_max": total I/O operations max (json-int)
-- "iops_rd_max": read I/O operations max (json-int)
-- "iops_wr_max": write I/O operations max (json-int)
-- "iops_size": I/O size in bytes when limiting (json-int)
-- "group": throttle group name (json-string)
+- "bps_max": total throughput limit during bursts, in bytes (json-int, optional)
+- "bps_rd_max": read throughput limit during bursts, in bytes (json-int, optional)
+- "bps_wr_max": write throughput limit during bursts, in bytes (json-int, optional)
+- "iops_max": total I/O operations per second during bursts (json-int, optional)
+- "iops_rd_max": read I/O operations per second during bursts (json-int, optional)
+- "iops_wr_max": write I/O operations per second during bursts (json-int, optional)
+- "bps_max_length": maximum length of the @bps_max burst period, in seconds (json-int, optional)
+- "bps_rd_max_length": maximum length of the @bps_rd_max burst period, in seconds (json-int, optional)
+- "bps_wr_max_length": maximum length of the @bps_wr_max burst period, in seconds (json-int, optional)
+- "iops_max_length": maximum length of the @iops_max burst period, in seconds (json-int, optional)
+- "iops_rd_max_length": maximum length of the @iops_rd_max burst period, in seconds (json-int, optional)
+- "iops_wr_max_length": maximum length of the @iops_wr_max burst period, in seconds (json-int, optional)
+- "iops_size": I/O size in bytes when limiting (json-int, optional)
+- "group": throttle group name (json-string, optional)
Example:
@@ -2049,6 +2055,7 @@ Example:
"iops_max": 0,
"iops_rd_max": 0,
"iops_wr_max": 0,
+ "bps_max_length": 60,
"iops_size": 0 } }
<- { "return": {} }
diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067
index 3788534d67..77dec0d1fc 100755
--- a/tests/qemu-iotests/067
+++ b/tests/qemu-iotests/067
@@ -45,11 +45,20 @@ function do_run_qemu()
echo
}
+# Remove QMP events from (pretty-printed) output. Doesn't handle
+# nested dicts correctly, but we don't get any of those in this test.
+_filter_qmp_events()
+{
+ tr '\n' '\t' | sed -e \
+ 's/{\s*"timestamp":\s*{[^}]*},\s*"event":[^,}]*\(,\s*"data":\s*{[^}]*}\)\?\s*}\s*//g' \
+ | tr '\t' '\n'
+}
+
function run_qemu()
{
do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu \
| sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' \
- | _filter_generated_node_ids
+ | _filter_generated_node_ids | _filter_qmp_events
}
size=128M
diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out
index ae3fccb15f..7e25a49029 100644
--- a/tests/qemu-iotests/067.out
+++ b/tests/qemu-iotests/067.out
@@ -70,34 +70,6 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk -device virti
}
}
{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "DEVICE_DELETED",
- "data": {
- "path": "/machine/peripheral/virtio0/virtio-backend"
- }
-}
-{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "DEVICE_DELETED",
- "data": {
- "device": "virtio0",
- "path": "/machine/peripheral/virtio0"
- }
-}
-{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "RESET"
-}
-{
"return": [
]
}
@@ -105,14 +77,6 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk -device virti
"return": {
}
}
-{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "SHUTDOWN"
-}
-
=== -drive/device_add and device_del ===
@@ -186,34 +150,6 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk
}
}
{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "DEVICE_DELETED",
- "data": {
- "path": "/machine/peripheral/virtio0/virtio-backend"
- }
-}
-{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "DEVICE_DELETED",
- "data": {
- "device": "virtio0",
- "path": "/machine/peripheral/virtio0"
- }
-}
-{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "RESET"
-}
-{
"return": [
]
}
@@ -221,14 +157,6 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk
"return": {
}
}
-{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "SHUTDOWN"
-}
-
=== drive_add/device_add and device_del ===
@@ -305,34 +233,6 @@ Testing:
}
}
{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "DEVICE_DELETED",
- "data": {
- "path": "/machine/peripheral/virtio0/virtio-backend"
- }
-}
-{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "DEVICE_DELETED",
- "data": {
- "device": "virtio0",
- "path": "/machine/peripheral/virtio0"
- }
-}
-{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "RESET"
-}
-{
"return": [
]
}
@@ -340,14 +240,6 @@ Testing:
"return": {
}
}
-{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "SHUTDOWN"
-}
-
=== blockdev_add/device_add and device_del ===
@@ -425,34 +317,6 @@ Testing:
}
}
{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "DEVICE_DELETED",
- "data": {
- "path": "/machine/peripheral/virtio0/virtio-backend"
- }
-}
-{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "DEVICE_DELETED",
- "data": {
- "device": "virtio0",
- "path": "/machine/peripheral/virtio0"
- }
-}
-{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "RESET"
-}
-{
"return": [
{
"io-status": "ok",
@@ -506,12 +370,4 @@ Testing:
"return": {
}
}
-{
- "timestamp": {
- "seconds": TIMESTAMP,
- "microseconds": TIMESTAMP
- },
- "event": "SHUTDOWN"
-}
-
*** done
diff --git a/tests/qemu-iotests/093 b/tests/qemu-iotests/093
index c0e9e2b0b5..ce8e13cb49 100755
--- a/tests/qemu-iotests/093
+++ b/tests/qemu-iotests/093
@@ -3,7 +3,7 @@
# Tests for IO throttling
#
# Copyright (C) 2015 Red Hat, Inc.
-# Copyright (C) 2015 Igalia, S.L.
+# Copyright (C) 2015-2016 Igalia, S.L.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -21,6 +21,8 @@
import iotests
+nsec_per_sec = 1000000000
+
class ThrottleTestCase(iotests.QMPTestCase):
test_img = "null-aio://"
max_drives = 3
@@ -42,16 +44,7 @@ class ThrottleTestCase(iotests.QMPTestCase):
def tearDown(self):
self.vm.shutdown()
- def do_test_throttle(self, ndrives, seconds, params):
- def check_limit(limit, num):
- # IO throttling algorithm is discrete, allow 10% error so the test
- # is more robust
- return limit == 0 or \
- (num < seconds * limit * 1.1 / ndrives
- and num > seconds * limit * 0.9 / ndrives)
-
- nsec_per_sec = 1000000000
-
+ def configure_throttle(self, ndrives, params):
params['group'] = 'test'
# Set the I/O throttling parameters to all drives
@@ -60,13 +53,21 @@ class ThrottleTestCase(iotests.QMPTestCase):
result = self.vm.qmp("block_set_io_throttle", conv_keys=False, **params)
self.assert_qmp(result, 'return', {})
+ def do_test_throttle(self, ndrives, seconds, params):
+ def check_limit(limit, num):
+ # IO throttling algorithm is discrete, allow 10% error so the test
+ # is more robust
+ return limit == 0 or \
+ (num < seconds * limit * 1.1 / ndrives
+ and num > seconds * limit * 0.9 / ndrives)
+
# Set vm clock to a known value
ns = seconds * nsec_per_sec
self.vm.qtest("clock_step %d" % ns)
- # Submit enough requests. They will drain bps_max and iops_max, but the
- # rest requests won't get executed until we advance the virtual clock
- # with qtest interface
+ # Submit enough requests so the throttling mechanism kicks
+ # in. The throttled requests won't be executed until we
+ # advance the virtual clock.
rq_size = 512
rd_nr = max(params['bps'] / rq_size / 2,
params['bps_rd'] / rq_size,
@@ -142,8 +143,44 @@ class ThrottleTestCase(iotests.QMPTestCase):
for tk in params:
limits = dict([(k, 0) for k in params])
limits[tk] = params[tk] * ndrives
+ self.configure_throttle(ndrives, limits)
self.do_test_throttle(ndrives, 5, limits)
+ def test_burst(self):
+ params = {"bps": 4096,
+ "bps_rd": 4096,
+ "bps_wr": 4096,
+ "iops": 10,
+ "iops_rd": 10,
+ "iops_wr": 10,
+ }
+ ndrives = 1
+ # Pick each out of all possible params and test
+ for tk in params:
+ rate = params[tk] * ndrives
+ burst_rate = rate * 7
+ burst_length = 4
+
+ # Configure the throttling settings
+ settings = dict([(k, 0) for k in params])
+ settings[tk] = rate
+ settings['%s_max' % tk] = burst_rate
+ settings['%s_max_length' % tk] = burst_length
+ self.configure_throttle(ndrives, settings)
+
+ # Wait for the bucket to empty so we can do bursts
+ wait_ns = nsec_per_sec * burst_length * burst_rate / rate
+ self.vm.qtest("clock_step %d" % wait_ns)
+
+ # Test I/O at the max burst rate
+ limits = dict([(k, 0) for k in params])
+ limits[tk] = burst_rate
+ self.do_test_throttle(ndrives, burst_length, limits)
+
+ # Now test I/O at the normal rate
+ limits[tk] = rate
+ self.do_test_throttle(ndrives, 5, limits)
+
class ThrottleTestCoroutine(ThrottleTestCase):
test_img = "null-co://"
diff --git a/tests/qemu-iotests/093.out b/tests/qemu-iotests/093.out
index fbc63e62f8..89968f35d7 100644
--- a/tests/qemu-iotests/093.out
+++ b/tests/qemu-iotests/093.out
@@ -1,5 +1,5 @@
-..
+....
----------------------------------------------------------------------
-Ran 2 tests
+Ran 4 tests
OK
diff --git a/tests/qemu-iotests/140 b/tests/qemu-iotests/140
index f78c3175a3..05e4506676 100755
--- a/tests/qemu-iotests/140
+++ b/tests/qemu-iotests/140
@@ -1,6 +1,10 @@
#!/bin/bash
#
-# Test case for ejecting a BB with an NBD server attached to it
+# Test case for ejecting a BlockBackend with an NBD server attached to it
+#
+# Verify that the NBD server stops offering the drive when ejecting a
+# BlockDriverState tree from a BlockBackend (that is, a medium from a
+# drive) exposed via an NBD server.
#
# Copyright (C) 2016 Red Hat, Inc.
#
@@ -49,7 +53,7 @@ _make_test_img 64k
$QEMU_IO -c 'write -P 42 0 64k' "$TEST_IMG" | _filter_qemu_io
keep_stderr=y \
-_launch_qemu -drive if=ide,media=cdrom,id=drv,file="$TEST_IMG",format=$IMGFMT \
+_launch_qemu -drive if=none,media=cdrom,id=drv,file="$TEST_IMG",format=$IMGFMT \
2> >(_filter_nbd)
_send_qemu_cmd $QEMU_HANDLE \
diff --git a/tests/qemu-iotests/140.out b/tests/qemu-iotests/140.out
index 72f1b4cf1c..0409cd0174 100644
--- a/tests/qemu-iotests/140.out
+++ b/tests/qemu-iotests/140.out
@@ -7,7 +7,6 @@ wrote 65536/65536 bytes at offset 0
{"return": {}}
read 65536/65536 bytes at offset 0
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "drv", "tray-open": true}}
{"return": {}}
can't open device nbd+unix:///drv?socket=TEST_DIR/nbd: No export with name 'drv' available
no file open, try 'help open'
diff --git a/tests/qemu-iotests/145 b/tests/qemu-iotests/145
new file mode 100755
index 0000000000..7d8febb8ce
--- /dev/null
+++ b/tests/qemu-iotests/145
@@ -0,0 +1,52 @@
+#!/bin/bash
+#
+# Test the combination of -incoming and snapshot=on
+#
+# Copyright (C) 2016 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=kwolf@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+ true
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt generic
+_supported_proto generic
+_supported_os Linux
+
+_make_test_img 1M
+echo quit | $QEMU -nographic -hda "$TEST_IMG" -incoming 'exec:true' -snapshot -serial none -monitor stdio | _filter_qemu
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/145.out b/tests/qemu-iotests/145.out
new file mode 100644
index 0000000000..75b5c8ac36
--- /dev/null
+++ b/tests/qemu-iotests/145.out
@@ -0,0 +1,5 @@
+QA output created by 145
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) qququiquit
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 65df029d7d..47fd40c546 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -147,3 +147,4 @@
142 auto
143 auto quick
144 rw auto quick
+145 auto quick
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 858f1aa43f..59675fa57b 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -35,6 +35,9 @@ static bool double_cmp(double x, double y)
/* tests for single bucket operations */
static void test_leak_bucket(void)
{
+ throttle_config_init(&cfg);
+ bkt = cfg.buckets[THROTTLE_BPS_TOTAL];
+
/* set initial value */
bkt.avg = 150;
bkt.max = 15;
@@ -57,13 +60,33 @@ static void test_leak_bucket(void)
g_assert(bkt.avg == 150);
g_assert(bkt.max == 15);
g_assert(double_cmp(bkt.level, 0));
+
+ /* check that burst_level leaks correctly */
+ bkt.burst_level = 6;
+ bkt.max = 250;
+ bkt.burst_length = 2; /* otherwise burst_level will not leak */
+ throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 100);
+ g_assert(double_cmp(bkt.burst_level, 3.5));
+
+ throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 100);
+ g_assert(double_cmp(bkt.burst_level, 1));
+
+ throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 100);
+ g_assert(double_cmp(bkt.burst_level, 0));
+
+ throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 100);
+ g_assert(double_cmp(bkt.burst_level, 0));
}
static void test_compute_wait(void)
{
+ unsigned i;
int64_t wait;
int64_t result;
+ throttle_config_init(&cfg);
+ bkt = cfg.buckets[THROTTLE_BPS_TOTAL];
+
/* no operation limit set */
bkt.avg = 0;
bkt.max = 15;
@@ -93,6 +116,27 @@ static void test_compute_wait(void)
/* time required to do half an operation */
result = (int64_t) NANOSECONDS_PER_SECOND / 150 / 2;
g_assert(wait == result);
+
+ /* Perform I/O for 2.2 seconds at a rate of bkt.max */
+ bkt.burst_length = 2;
+ bkt.level = 0;
+ bkt.avg = 10;
+ bkt.max = 200;
+ for (i = 0; i < 22; i++) {
+ double units = bkt.max / 10;
+ bkt.level += units;
+ bkt.burst_level += units;
+ throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 10);
+ wait = throttle_compute_wait(&bkt);
+ g_assert(double_cmp(bkt.burst_level, 0));
+ g_assert(double_cmp(bkt.level, (i + 1) * (bkt.max - bkt.avg) / 10));
+ /* We can do bursts for the 2 seconds we have configured in
+ * burst_length. We have 100 extra miliseconds of burst
+ * because bkt.level has been leaking during this time.
+ * After that, we have to wait. */
+ result = i < 21 ? 0 : 1.8 * NANOSECONDS_PER_SECOND;
+ g_assert(wait == result);
+ }
}
/* functions to test ThrottleState initialization/destroy methods */
@@ -222,6 +266,8 @@ static void set_cfg_value(bool is_max, int index, int value)
{
if (is_max) {
cfg.buckets[index].max = value;
+ /* If max is set, avg should never be 0 */
+ cfg.buckets[index].avg = MAX(cfg.buckets[index].avg, 1);
} else {
cfg.buckets[index].avg = value;
}
@@ -231,17 +277,17 @@ static void test_enabled(void)
{
int i;
- memset(&cfg, 0, sizeof(cfg));
+ throttle_config_init(&cfg);
g_assert(!throttle_enabled(&cfg));
for (i = 0; i < BUCKETS_COUNT; i++) {
- memset(&cfg, 0, sizeof(cfg));
+ throttle_config_init(&cfg);
set_cfg_value(false, i, 150);
g_assert(throttle_enabled(&cfg));
}
for (i = 0; i < BUCKETS_COUNT; i++) {
- memset(&cfg, 0, sizeof(cfg));
+ throttle_config_init(&cfg);
set_cfg_value(false, i, -150);
g_assert(!throttle_enabled(&cfg));
}
@@ -254,32 +300,32 @@ static void test_conflicts_for_one_set(bool is_max,
int read,
int write)
{
- memset(&cfg, 0, sizeof(cfg));
- g_assert(!throttle_conflicting(&cfg));
+ throttle_config_init(&cfg);
+ g_assert(throttle_is_valid(&cfg, NULL));
set_cfg_value(is_max, total, 1);
set_cfg_value(is_max, read, 1);
- g_assert(throttle_conflicting(&cfg));
+ g_assert(!throttle_is_valid(&cfg, NULL));
- memset(&cfg, 0, sizeof(cfg));
+ throttle_config_init(&cfg);
set_cfg_value(is_max, total, 1);
set_cfg_value(is_max, write, 1);
- g_assert(throttle_conflicting(&cfg));
+ g_assert(!throttle_is_valid(&cfg, NULL));
- memset(&cfg, 0, sizeof(cfg));
+ throttle_config_init(&cfg);
set_cfg_value(is_max, total, 1);
set_cfg_value(is_max, read, 1);
set_cfg_value(is_max, write, 1);
- g_assert(throttle_conflicting(&cfg));
+ g_assert(!throttle_is_valid(&cfg, NULL));
- memset(&cfg, 0, sizeof(cfg));
+ throttle_config_init(&cfg);
set_cfg_value(is_max, total, 1);
- g_assert(!throttle_conflicting(&cfg));
+ g_assert(throttle_is_valid(&cfg, NULL));
- memset(&cfg, 0, sizeof(cfg));
+ throttle_config_init(&cfg);
set_cfg_value(is_max, read, 1);
set_cfg_value(is_max, write, 1);
- g_assert(!throttle_conflicting(&cfg));
+ g_assert(throttle_is_valid(&cfg, NULL));
}
static void test_conflicting_config(void)
@@ -313,9 +359,9 @@ static void test_is_valid_for_value(int value, bool should_be_valid)
int is_max, index;
for (is_max = 0; is_max < 2; is_max++) {
for (index = 0; index < BUCKETS_COUNT; index++) {
- memset(&cfg, 0, sizeof(cfg));
+ throttle_config_init(&cfg);
set_cfg_value(is_max, index, value);
- g_assert(throttle_is_valid(&cfg) == should_be_valid);
+ g_assert(throttle_is_valid(&cfg, NULL) == should_be_valid);
}
}
}
@@ -335,18 +381,18 @@ static void test_max_is_missing_limit(void)
int i;
for (i = 0; i < BUCKETS_COUNT; i++) {
- memset(&cfg, 0, sizeof(cfg));
+ throttle_config_init(&cfg);
cfg.buckets[i].max = 100;
cfg.buckets[i].avg = 0;
- g_assert(throttle_max_is_missing_limit(&cfg));
+ g_assert(!throttle_is_valid(&cfg, NULL));
cfg.buckets[i].max = 0;
cfg.buckets[i].avg = 0;
- g_assert(!throttle_max_is_missing_limit(&cfg));
+ g_assert(throttle_is_valid(&cfg, NULL));
cfg.buckets[i].max = 0;
cfg.buckets[i].avg = 100;
- g_assert(!throttle_max_is_missing_limit(&cfg));
+ g_assert(throttle_is_valid(&cfg, NULL));
}
}
@@ -550,7 +596,7 @@ static void test_groups(void)
g_assert(bdrv1->throttle_state == bdrv3->throttle_state);
/* Setting the config of a group member affects the whole group */
- memset(&cfg1, 0, sizeof(cfg1));
+ throttle_config_init(&cfg1);
cfg1.buckets[THROTTLE_BPS_READ].avg = 500000;
cfg1.buckets[THROTTLE_BPS_WRITE].avg = 285000;
cfg1.buckets[THROTTLE_OPS_READ].avg = 20000;
diff --git a/util/throttle.c b/util/throttle.c
index 2f9b23d925..371c769455 100644
--- a/util/throttle.c
+++ b/util/throttle.c
@@ -41,6 +41,14 @@ void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns)
/* make the bucket leak */
bkt->level = MAX(bkt->level - leak, 0);
+
+ /* if we allow bursts for more than one second we also need to
+ * keep track of bkt->burst_level so the bkt->max goal per second
+ * is attained */
+ if (bkt->burst_length > 1) {
+ leak = (bkt->max * (double) delta_ns) / NANOSECONDS_PER_SECOND;
+ bkt->burst_level = MAX(bkt->burst_level - leak, 0);
+ }
}
/* Calculate the time delta since last leak and make proportionals leaks
@@ -91,13 +99,24 @@ int64_t throttle_compute_wait(LeakyBucket *bkt)
return 0;
}
- extra = bkt->level - bkt->max;
+ /* If the bucket is full then we have to wait */
+ extra = bkt->level - bkt->max * bkt->burst_length;
+ if (extra > 0) {
+ return throttle_do_compute_wait(bkt->avg, extra);
+ }
- if (extra <= 0) {
- return 0;
+ /* If the bucket is not full yet we have to make sure that we
+ * fulfill the goal of bkt->max units per second. */
+ if (bkt->burst_length > 1) {
+ /* We use 1/10 of the max value to smooth the throttling.
+ * See throttle_fix_bucket() for more details. */
+ extra = bkt->burst_level - bkt->max / 10;
+ if (extra > 0) {
+ return throttle_do_compute_wait(bkt->max, extra);
+ }
}
- return throttle_do_compute_wait(bkt->avg, extra);
+ return 0;
}
/* This function compute the time that must be waited while this IO
@@ -137,10 +156,10 @@ static int64_t throttle_compute_wait_for(ThrottleState *ts,
* @next_timestamp: the resulting timer
* @ret: true if a timer must be set
*/
-bool throttle_compute_timer(ThrottleState *ts,
- bool is_write,
- int64_t now,
- int64_t *next_timestamp)
+static bool throttle_compute_timer(ThrottleState *ts,
+ bool is_write,
+ int64_t now,
+ int64_t *next_timestamp)
{
int64_t wait;
@@ -171,10 +190,24 @@ void throttle_timers_attach_aio_context(ThrottleTimers *tt,
tt->write_timer_cb, tt->timer_opaque);
}
+/*
+ * Initialize the ThrottleConfig structure to a valid state
+ * @cfg: the config to initialize
+ */
+void throttle_config_init(ThrottleConfig *cfg)
+{
+ unsigned i;
+ memset(cfg, 0, sizeof(*cfg));
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ cfg->buckets[i].burst_length = 1;
+ }
+}
+
/* To be called first on the ThrottleState */
void throttle_init(ThrottleState *ts)
{
memset(ts, 0, sizeof(ThrottleState));
+ throttle_config_init(&ts->cfg);
}
/* To be called first on the ThrottleTimers */
@@ -248,13 +281,14 @@ bool throttle_enabled(ThrottleConfig *cfg)
return false;
}
-/* return true if any two throttling parameters conflicts
- *
+/* check if a throttling configuration is valid
* @cfg: the throttling configuration to inspect
- * @ret: true if any conflict detected else false
+ * @ret: true if valid else false
+ * @errp: error object
*/
-bool throttle_conflicting(ThrottleConfig *cfg)
+bool throttle_is_valid(ThrottleConfig *cfg, Error **errp)
{
+ int i;
bool bps_flag, ops_flag;
bool bps_max_flag, ops_max_flag;
@@ -274,42 +308,40 @@ bool throttle_conflicting(ThrottleConfig *cfg)
(cfg->buckets[THROTTLE_OPS_READ].max ||
cfg->buckets[THROTTLE_OPS_WRITE].max);
- return bps_flag || ops_flag || bps_max_flag || ops_max_flag;
-}
-
-/* check if a throttling configuration is valid
- * @cfg: the throttling configuration to inspect
- * @ret: true if valid else false
- */
-bool throttle_is_valid(ThrottleConfig *cfg)
-{
- int i;
+ if (bps_flag || ops_flag || bps_max_flag || ops_max_flag) {
+ error_setg(errp, "bps/iops/max total values and read/write values"
+ " cannot be used at the same time");
+ return false;
+ }
for (i = 0; i < BUCKETS_COUNT; i++) {
if (cfg->buckets[i].avg < 0 ||
cfg->buckets[i].max < 0 ||
cfg->buckets[i].avg > THROTTLE_VALUE_MAX ||
cfg->buckets[i].max > THROTTLE_VALUE_MAX) {
+ error_setg(errp, "bps/iops/max values must be within [0, %lld]",
+ THROTTLE_VALUE_MAX);
return false;
}
- }
- return true;
-}
+ if (!cfg->buckets[i].burst_length) {
+ error_setg(errp, "the burst length cannot be 0");
+ return false;
+ }
-/* check if bps_max/iops_max is used without bps/iops
- * @cfg: the throttling configuration to inspect
- */
-bool throttle_max_is_missing_limit(ThrottleConfig *cfg)
-{
- int i;
+ if (cfg->buckets[i].burst_length > 1 && !cfg->buckets[i].max) {
+ error_setg(errp, "burst length set without burst rate");
+ return false;
+ }
- for (i = 0; i < BUCKETS_COUNT; i++) {
if (cfg->buckets[i].max && !cfg->buckets[i].avg) {
- return true;
+ error_setg(errp, "bps_max/iops_max require corresponding"
+ " bps/iops values");
+ return false;
}
}
- return false;
+
+ return true;
}
/* fix bucket parameters */
@@ -318,7 +350,7 @@ static void throttle_fix_bucket(LeakyBucket *bkt)
double min;
/* zero bucket level */
- bkt->level = 0;
+ bkt->level = bkt->burst_level = 0;
/* The following is done to cope with the Linux CFQ block scheduler
* which regroup reads and writes by block of 100ms in the guest.
@@ -421,22 +453,36 @@ bool throttle_schedule_timer(ThrottleState *ts,
*/
void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
{
+ const BucketType bucket_types_size[2][2] = {
+ { THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ },
+ { THROTTLE_BPS_TOTAL, THROTTLE_BPS_WRITE }
+ };
+ const BucketType bucket_types_units[2][2] = {
+ { THROTTLE_OPS_TOTAL, THROTTLE_OPS_READ },
+ { THROTTLE_OPS_TOTAL, THROTTLE_OPS_WRITE }
+ };
double units = 1.0;
+ unsigned i;
/* if cfg.op_size is defined and smaller than size we compute unit count */
if (ts->cfg.op_size && size > ts->cfg.op_size) {
units = (double) size / ts->cfg.op_size;
}
- ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size;
- ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units;
+ for (i = 0; i < 2; i++) {
+ LeakyBucket *bkt;
- if (is_write) {
- ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size;
- ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units;
- } else {
- ts->cfg.buckets[THROTTLE_BPS_READ].level += size;
- ts->cfg.buckets[THROTTLE_OPS_READ].level += units;
+ bkt = &ts->cfg.buckets[bucket_types_size[is_write][i]];
+ bkt->level += size;
+ if (bkt->burst_length > 1) {
+ bkt->burst_level += size;
+ }
+
+ bkt = &ts->cfg.buckets[bucket_types_units[is_write][i]];
+ bkt->level += units;
+ if (bkt->burst_length > 1) {
+ bkt->burst_level += units;
+ }
}
}