block-migration: add lock
Some state is shared between the block migration code and its AIO
callbacks. Once block migration will run outside the iothread,
the block migration code and the AIO callbacks will be able to
run concurrently. Protect the critical sections with a separate
lock. Do the same for completed_sectors, which can be used from
the monitor.
Reviewed-by: Orit Wasserman <owasserm@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
diff --git a/block-migration.c b/block-migration.c
index d62a8b8..b726c6c 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -54,7 +54,7 @@
int64_t cur_sector;
int64_t cur_dirty;
- /* Protected by iothread lock. */
+ /* Protected by block migration lock. */
unsigned long *aio_bitmap;
int64_t completed_sectors;
} BlkMigDevState;
@@ -69,7 +69,7 @@
QEMUIOVector qiov;
BlockDriverAIOCB *aiocb;
- /* Protected by iothread lock. */
+ /* Protected by block migration lock. */
int ret;
QSIMPLEQ_ENTRY(BlkMigBlock) entry;
} BlkMigBlock;
@@ -81,7 +81,7 @@
QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
int64_t total_sector_sum;
- /* Protected by iothread lock. */
+ /* Protected by lock. */
QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
int submitted;
int read_done;
@@ -90,10 +90,23 @@
int transferred;
int prev_progress;
int bulk_completed;
+
+ /* Lock must be taken _inside_ the iothread lock. */
+ QemuMutex lock;
} BlkMigState;
static BlkMigState block_mig_state;
+static void blk_mig_lock(void)
+{
+ qemu_mutex_lock(&block_mig_state.lock);
+}
+
+static void blk_mig_unlock(void)
+{
+ qemu_mutex_unlock(&block_mig_state.lock);
+}
+
static void blk_send(QEMUFile *f, BlkMigBlock * blk)
{
int len;
@@ -120,9 +133,11 @@
BlkMigDevState *bmds;
uint64_t sum = 0;
+ blk_mig_lock();
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
sum += bmds->completed_sectors;
}
+ blk_mig_unlock();
return sum << BDRV_SECTOR_BITS;
}
@@ -142,6 +157,9 @@
return sum << BDRV_SECTOR_BITS;
}
+
+/* Called with migration lock held. */
+
static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
{
int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
@@ -154,6 +172,8 @@
}
}
+/* Called with migration lock held. */
+
static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
int nb_sectors, int set)
{
@@ -188,10 +208,13 @@
bmds->aio_bitmap = g_malloc0(bitmap_size);
}
+/* Never hold migration lock when yielding to the main loop! */
+
static void blk_mig_read_cb(void *opaque, int ret)
{
BlkMigBlock *blk = opaque;
+ blk_mig_lock();
blk->ret = ret;
QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
@@ -200,6 +223,7 @@
block_mig_state.submitted--;
block_mig_state.read_done++;
assert(block_mig_state.submitted >= 0);
+ blk_mig_unlock();
}
static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
@@ -244,7 +268,9 @@
blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
+ blk_mig_lock();
block_mig_state.submitted++;
+ blk_mig_unlock();
blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
nr_sectors, blk_mig_read_cb, blk);
@@ -366,8 +392,12 @@
int ret = -EIO;
for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
+ blk_mig_lock();
if (bmds_aio_inflight(bmds, sector)) {
+ blk_mig_unlock();
bdrv_drain_all();
+ } else {
+ blk_mig_unlock();
}
if (bdrv_get_dirty(bmds->bs, sector)) {
@@ -389,8 +419,11 @@
blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
nr_sectors, blk_mig_read_cb, blk);
+
+ blk_mig_lock();
block_mig_state.submitted++;
bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
+ blk_mig_unlock();
} else {
ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors);
if (ret < 0) {
@@ -446,6 +479,7 @@
__FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
block_mig_state.transferred);
+ blk_mig_lock();
while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
if (qemu_file_rate_limit(f)) {
break;
@@ -456,7 +490,9 @@
}
QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
+ blk_mig_unlock();
blk_send(f, blk);
+ blk_mig_lock();
g_free(blk->buf);
g_free(blk);
@@ -465,6 +501,7 @@
block_mig_state.transferred++;
assert(block_mig_state.read_done >= 0);
}
+ blk_mig_unlock();
DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
block_mig_state.submitted, block_mig_state.read_done,
@@ -493,6 +530,7 @@
set_dirty_tracking(0);
+ blk_mig_lock();
while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
bdrv_set_in_use(bmds->bs, 0);
@@ -506,6 +544,7 @@
g_free(blk->buf);
g_free(blk);
}
+ blk_mig_unlock();
}
static void block_migration_cancel(void *opaque)
@@ -548,9 +587,11 @@
blk_mig_reset_dirty_cursor();
/* control the rate of transfer */
+ blk_mig_lock();
while ((block_mig_state.submitted +
block_mig_state.read_done) * BLOCK_SIZE <
qemu_file_get_rate_limit(f)) {
+ blk_mig_unlock();
if (block_mig_state.bulk_completed == 0) {
/* first finish the bulk phase */
if (blk_mig_save_bulked_block(f) == 0) {
@@ -564,11 +605,13 @@
if (ret < 0) {
return ret;
}
+ blk_mig_lock();
if (ret != 0) {
/* no more dirty blocks */
break;
}
}
+ blk_mig_unlock();
ret = flush_blks(f);
if (ret) {
@@ -595,7 +638,9 @@
/* we know for sure that save bulk is completed and
all async read completed */
+ blk_mig_lock();
assert(block_mig_state.submitted == 0);
+ blk_mig_unlock();
do {
ret = blk_mig_save_dirty_block(f, 0);
@@ -620,6 +665,7 @@
/* Estimate pending number of bytes to send */
uint64_t pending;
+ blk_mig_lock();
pending = get_remaining_dirty() +
block_mig_state.submitted * BLOCK_SIZE +
block_mig_state.read_done * BLOCK_SIZE;
@@ -628,6 +674,7 @@
if (pending == 0 && !block_mig_state.bulk_completed) {
pending = BLOCK_SIZE;
}
+ blk_mig_unlock();
DPRINTF("Enter save live pending %" PRIu64 "\n", pending);
return pending;
@@ -739,6 +786,7 @@
{
QSIMPLEQ_INIT(&block_mig_state.bmds_list);
QSIMPLEQ_INIT(&block_mig_state.blk_list);
+ qemu_mutex_init(&block_mig_state.lock);
register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers,
&block_mig_state);