aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2019-10-14 16:09:52 +0100
committerPeter Maydell <peter.maydell@linaro.org>2019-10-14 16:09:52 +0100
commitc760cb77e511eb05094df67c1b30029a952efa35 (patch)
treed57ed37e184c39091bc94f67a760e417b0fc2d3b
parent22dbfdecc3c52228d3489da3fe81da92b21197bf (diff)
parent9a85e4b8f672016adbf7b7d5beaab2a99b9b5615 (diff)
Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20191011a' into staging
Migration pull 2019-10-11 Mostly cleanups and minor fixes [Note I'm seeing a hang on the aarch64 hosted x86-64 tcg migration test in xbzrle; but I'm seeing that on current head as well] # gpg: Signature made Fri 11 Oct 2019 20:14:31 BST # gpg: using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7 # gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full] # Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7 * remotes/dgilbert/tags/pull-migration-20191011a: (21 commits) migration: Support gtree migration migration/multifd: pages->used would be cleared when attach to multifd_send_state migration/multifd: initialize packet->magic/version once at setup stage migration/multifd: use pages->allocated instead of the static max migration/multifd: fix a typo in comment of multifd_recv_unfill_packet() migration/postcopy: check PostcopyState before setting to POSTCOPY_INCOMING_RUNNING migration/postcopy: rename postcopy_ram_enable_notify to postcopy_ram_incoming_setup migration/postcopy: postpone setting PostcopyState to END migration/postcopy: mis->have_listen_thread check will never be touched migration: report SaveStateEntry id and name on failure migration: pass in_postcopy instead of check state again migration/postcopy: fix typo in mark_postcopy_blocktime_begin's comment migration/postcopy: map large zero page in postcopy_ram_incoming_setup() migration/postcopy: allocate tmp_page in setup stage migration: Don't try and recover return path in non-postcopy rcu: Use automatic rc_read unlock in core memory/exec code migration: Use automatic rcu_read unlock in rdma.c migration: Use automatic rcu_read unlock in ram.c migration: Fix missing rcu_read_unlock rcu: Add automatically released rcu_read_lock variants ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--docs/devel/rcu.txt16
-rw-r--r--exec.c116
-rw-r--r--include/exec/ram_addr.h138
-rw-r--r--include/migration/misc.h1
-rw-r--r--include/migration/vmstate.h40
-rw-r--r--include/qemu/rcu.h25
-rw-r--r--memory.c15
-rw-r--r--migration/migration.c17
-rw-r--r--migration/postcopy-ram.c88
-rw-r--r--migration/postcopy-ram.h9
-rw-r--r--migration/ram.c298
-rw-r--r--migration/rdma.c57
-rw-r--r--migration/savevm.c14
-rw-r--r--migration/trace-events5
-rw-r--r--migration/vmstate-types.c152
-rw-r--r--tests/test-vmstate.c421
16 files changed, 980 insertions, 432 deletions
diff --git a/docs/devel/rcu.txt b/docs/devel/rcu.txt
index c84e7f42b2..d83fed2f79 100644
--- a/docs/devel/rcu.txt
+++ b/docs/devel/rcu.txt
@@ -187,6 +187,22 @@ The following APIs must be used before RCU is used in a thread:
Note that these APIs are relatively heavyweight, and should _not_ be
nested.
+Convenience macros
+==================
+
+Two macros are provided that automatically release the read lock at the
+end of the scope.
+
+ RCU_READ_LOCK_GUARD()
+
+ Takes the lock and will release it at the end of the block it's
+ used in.
+
+ WITH_RCU_READ_LOCK_GUARD() { code }
+
+ Is used at the head of a block to protect the code within the block.
+
+Note that 'goto'ing out of the guarded block will also drop the lock.
DIFFERENCES WITH LINUX
======================
diff --git a/exec.c b/exec.c
index bdcfcdff3f..fb0943cfed 100644
--- a/exec.c
+++ b/exec.c
@@ -1037,16 +1037,14 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs)
return;
}
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
mr = address_space_translate(as, addr, &addr, &l, false, attrs);
if (!(memory_region_is_ram(mr)
|| memory_region_is_romd(mr))) {
- rcu_read_unlock();
return;
}
ram_addr = memory_region_get_ram_addr(mr) + addr;
tb_invalidate_phys_page_range(ram_addr, ram_addr + 1);
- rcu_read_unlock();
}
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
@@ -1332,14 +1330,13 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
end = TARGET_PAGE_ALIGN(start + length);
start &= TARGET_PAGE_MASK;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
block = qemu_get_ram_block(start);
assert(block == qemu_get_ram_block(end - 1));
start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
CPU_FOREACH(cpu) {
tlb_reset_dirty(cpu, start1, length);
}
- rcu_read_unlock();
}
/* Note: start and end must be within the same ram block. */
@@ -1360,30 +1357,29 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
page = start >> TARGET_PAGE_BITS;
- rcu_read_lock();
+ WITH_RCU_READ_LOCK_GUARD() {
+ blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+ ramblock = qemu_get_ram_block(start);
+ /* Range sanity check on the ramblock */
+ assert(start >= ramblock->offset &&
+ start + length <= ramblock->offset + ramblock->used_length);
- blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
- ramblock = qemu_get_ram_block(start);
- /* Range sanity check on the ramblock */
- assert(start >= ramblock->offset &&
- start + length <= ramblock->offset + ramblock->used_length);
+ while (page < end) {
+ unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+ unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+ unsigned long num = MIN(end - page,
+ DIRTY_MEMORY_BLOCK_SIZE - offset);
- while (page < end) {
- unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
- unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
- unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
+ dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
+ offset, num);
+ page += num;
+ }
- dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
- offset, num);
- page += num;
+ mr_offset = (ram_addr_t)(page << TARGET_PAGE_BITS) - ramblock->offset;
+ mr_size = (end - page) << TARGET_PAGE_BITS;
+ memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size);
}
- mr_offset = (ram_addr_t)(page << TARGET_PAGE_BITS) - ramblock->offset;
- mr_size = (end - page) << TARGET_PAGE_BITS;
- memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size);
-
- rcu_read_unlock();
-
if (dirty && tcg_enabled()) {
tlb_reset_dirty_range_all(start, length);
}
@@ -1411,28 +1407,27 @@ DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
end = last >> TARGET_PAGE_BITS;
dest = 0;
- rcu_read_lock();
+ WITH_RCU_READ_LOCK_GUARD() {
+ blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
- blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+ while (page < end) {
+ unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+ unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+ unsigned long num = MIN(end - page,
+ DIRTY_MEMORY_BLOCK_SIZE - offset);
- while (page < end) {
- unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
- unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
- unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
-
- assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL)));
- assert(QEMU_IS_ALIGNED(num, (1 << BITS_PER_LEVEL)));
- offset >>= BITS_PER_LEVEL;
+ assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL)));
+ assert(QEMU_IS_ALIGNED(num, (1 << BITS_PER_LEVEL)));
+ offset >>= BITS_PER_LEVEL;
- bitmap_copy_and_clear_atomic(snap->dirty + dest,
- blocks->blocks[idx] + offset,
- num);
- page += num;
- dest += num >> BITS_PER_LEVEL;
+ bitmap_copy_and_clear_atomic(snap->dirty + dest,
+ blocks->blocks[idx] + offset,
+ num);
+ page += num;
+ dest += num >> BITS_PER_LEVEL;
+ }
}
- rcu_read_unlock();
-
if (tcg_enabled()) {
tlb_reset_dirty_range_all(start, length);
}
@@ -1643,7 +1638,7 @@ void ram_block_dump(Monitor *mon)
RAMBlock *block;
char *psize;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
monitor_printf(mon, "%24s %8s %18s %18s %18s\n",
"Block Name", "PSize", "Offset", "Used", "Total");
RAMBLOCK_FOREACH(block) {
@@ -1655,7 +1650,6 @@ void ram_block_dump(Monitor *mon)
(uint64_t)block->max_length);
g_free(psize);
}
- rcu_read_unlock();
}
#ifdef __linux__
@@ -2009,11 +2003,10 @@ static unsigned long last_ram_page(void)
RAMBlock *block;
ram_addr_t last = 0;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
RAMBLOCK_FOREACH(block) {
last = MAX(last, block->offset + block->max_length);
}
- rcu_read_unlock();
return last >> TARGET_PAGE_BITS;
}
@@ -2100,7 +2093,7 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
}
pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
RAMBLOCK_FOREACH(block) {
if (block != new_block &&
!strcmp(block->idstr, new_block->idstr)) {
@@ -2109,7 +2102,6 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
abort();
}
}
- rcu_read_unlock();
}
/* Called with iothread lock held. */
@@ -2651,17 +2643,16 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
if (xen_enabled()) {
ram_addr_t ram_addr;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
ram_addr = xen_ram_addr_from_mapcache(ptr);
block = qemu_get_ram_block(ram_addr);
if (block) {
*offset = ram_addr - block->offset;
}
- rcu_read_unlock();
return block;
}
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
block = atomic_rcu_read(&ram_list.mru_block);
if (block && block->host && host - block->host < block->max_length) {
goto found;
@@ -2677,7 +2668,6 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
}
}
- rcu_read_unlock();
return NULL;
found:
@@ -2685,7 +2675,6 @@ found:
if (round_offset) {
*offset &= TARGET_PAGE_MASK;
}
- rcu_read_unlock();
return block;
}
@@ -3281,10 +3270,9 @@ MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
FlatView *fv;
if (len > 0) {
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
fv = address_space_to_flatview(as);
result = flatview_read(fv, addr, attrs, buf, len);
- rcu_read_unlock();
}
return result;
@@ -3298,10 +3286,9 @@ MemTxResult address_space_write(AddressSpace *as, hwaddr addr,
FlatView *fv;
if (len > 0) {
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
fv = address_space_to_flatview(as);
result = flatview_write(fv, addr, attrs, buf, len);
- rcu_read_unlock();
}
return result;
@@ -3341,7 +3328,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
hwaddr addr1;
MemoryRegion *mr;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
while (len > 0) {
l = len;
mr = address_space_translate(as, addr, &addr1, &l, true, attrs);
@@ -3366,7 +3353,6 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
buf += l;
addr += l;
}
- rcu_read_unlock();
return MEMTX_OK;
}
@@ -3511,10 +3497,9 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr,
FlatView *fv;
bool result;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
fv = address_space_to_flatview(as);
result = flatview_access_valid(fv, addr, len, is_write, attrs);
- rcu_read_unlock();
return result;
}
@@ -3569,13 +3554,12 @@ void *address_space_map(AddressSpace *as,
}
l = len;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
fv = address_space_to_flatview(as);
mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
if (!memory_access_is_direct(mr, is_write)) {
if (atomic_xchg(&bounce.in_use, true)) {
- rcu_read_unlock();
return NULL;
}
/* Avoid unbounded allocations */
@@ -3591,7 +3575,6 @@ void *address_space_map(AddressSpace *as,
bounce.buffer, l);
}
- rcu_read_unlock();
*plen = l;
return bounce.buffer;
}
@@ -3601,7 +3584,6 @@ void *address_space_map(AddressSpace *as,
*plen = flatview_extend_translation(fv, addr, len, mr, xlat,
l, is_write, attrs);
ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen, true);
- rcu_read_unlock();
return ptr;
}
@@ -3869,13 +3851,12 @@ bool cpu_physical_memory_is_io(hwaddr phys_addr)
hwaddr l = 1;
bool res;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
mr = address_space_translate(&address_space_memory,
phys_addr, &phys_addr, &l, false,
MEMTXATTRS_UNSPECIFIED);
res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
- rcu_read_unlock();
return res;
}
@@ -3884,14 +3865,13 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
RAMBlock *block;
int ret = 0;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
RAMBLOCK_FOREACH(block) {
ret = func(block, opaque);
if (ret) {
break;
}
}
- rcu_read_unlock();
return ret;
}
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index e96e621de5..ad158bb247 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -193,30 +193,29 @@ static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
page = start >> TARGET_PAGE_BITS;
- rcu_read_lock();
-
- blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+ WITH_RCU_READ_LOCK_GUARD() {
+ blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+
+ idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+ offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+ base = page - offset;
+ while (page < end) {
+ unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
+ unsigned long num = next - base;
+ unsigned long found = find_next_bit(blocks->blocks[idx],
+ num, offset);
+ if (found < num) {
+ dirty = true;
+ break;
+ }
- idx = page / DIRTY_MEMORY_BLOCK_SIZE;
- offset = page % DIRTY_MEMORY_BLOCK_SIZE;
- base = page - offset;
- while (page < end) {
- unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
- unsigned long num = next - base;
- unsigned long found = find_next_bit(blocks->blocks[idx], num, offset);
- if (found < num) {
- dirty = true;
- break;
+ page = next;
+ idx++;
+ offset = 0;
+ base += DIRTY_MEMORY_BLOCK_SIZE;
}
-
- page = next;
- idx++;
- offset = 0;
- base += DIRTY_MEMORY_BLOCK_SIZE;
}
- rcu_read_unlock();
-
return dirty;
}
@@ -234,7 +233,7 @@ static inline bool cpu_physical_memory_all_dirty(ram_addr_t start,
end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
page = start >> TARGET_PAGE_BITS;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
@@ -256,8 +255,6 @@ static inline bool cpu_physical_memory_all_dirty(ram_addr_t start,
base += DIRTY_MEMORY_BLOCK_SIZE;
}
- rcu_read_unlock();
-
return dirty;
}
@@ -309,13 +306,11 @@ static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr,
idx = page / DIRTY_MEMORY_BLOCK_SIZE;
offset = page % DIRTY_MEMORY_BLOCK_SIZE;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
set_bit_atomic(offset, blocks->blocks[idx]);
-
- rcu_read_unlock();
}
static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
@@ -334,39 +329,37 @@ static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
page = start >> TARGET_PAGE_BITS;
- rcu_read_lock();
+ WITH_RCU_READ_LOCK_GUARD() {
+ for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
+ blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]);
+ }
- for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
- blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]);
- }
+ idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+ offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+ base = page - offset;
+ while (page < end) {
+ unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
- idx = page / DIRTY_MEMORY_BLOCK_SIZE;
- offset = page % DIRTY_MEMORY_BLOCK_SIZE;
- base = page - offset;
- while (page < end) {
- unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
+ if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) {
+ bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx],
+ offset, next - page);
+ }
+ if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) {
+ bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx],
+ offset, next - page);
+ }
+ if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
+ bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
+ offset, next - page);
+ }
- if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) {
- bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx],
- offset, next - page);
+ page = next;
+ idx++;
+ offset = 0;
+ base += DIRTY_MEMORY_BLOCK_SIZE;
}
- if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) {
- bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx],
- offset, next - page);
- }
- if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
- bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
- offset, next - page);
- }
-
- page = next;
- idx++;
- offset = 0;
- base += DIRTY_MEMORY_BLOCK_SIZE;
}
- rcu_read_unlock();
-
xen_hvm_modified_memory(start, length);
}
@@ -396,36 +389,35 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
offset = BIT_WORD((start >> TARGET_PAGE_BITS) %
DIRTY_MEMORY_BLOCK_SIZE);
- rcu_read_lock();
+ WITH_RCU_READ_LOCK_GUARD() {
+ for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
+ blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks;
+ }
- for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
- blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks;
- }
+ for (k = 0; k < nr; k++) {
+ if (bitmap[k]) {
+ unsigned long temp = leul_to_cpu(bitmap[k]);
- for (k = 0; k < nr; k++) {
- if (bitmap[k]) {
- unsigned long temp = leul_to_cpu(bitmap[k]);
+ atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
- atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
+ if (global_dirty_log) {
+ atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset],
+ temp);
+ }
- if (global_dirty_log) {
- atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset],
- temp);
+ if (tcg_enabled()) {
+ atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset],
+ temp);
+ }
}
- if (tcg_enabled()) {
- atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], temp);
+ if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
+ offset = 0;
+ idx++;
}
}
-
- if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
- offset = 0;
- idx++;
- }
}
- rcu_read_unlock();
-
xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS);
} else {
uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
diff --git a/include/migration/misc.h b/include/migration/misc.h
index b9d8e787af..d2762257aa 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -60,6 +60,7 @@ void migration_object_init(void);
void migration_shutdown(void);
void qemu_start_incoming_migration(const char *uri, Error **errp);
bool migration_is_idle(void);
+bool migration_is_active(MigrationState *);
void add_migration_state_change_notifier(Notifier *notify);
void remove_migration_state_change_notifier(Notifier *notify);
bool migration_in_setup(MigrationState *);
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 1fbfd099dd..b9ee563aa4 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -224,6 +224,7 @@ extern const VMStateInfo vmstate_info_unused_buffer;
extern const VMStateInfo vmstate_info_tmp;
extern const VMStateInfo vmstate_info_bitmap;
extern const VMStateInfo vmstate_info_qtailq;
+extern const VMStateInfo vmstate_info_gtree;
#define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0)
/*
@@ -754,6 +755,45 @@ extern const VMStateInfo vmstate_info_qtailq;
.start = offsetof(_type, _next), \
}
+/*
+ * For migrating a GTree whose key is a pointer to _key_type and the
+ * value, a pointer to _val_type
+ * The target tree must have been properly initialized
+ * _vmsd: Start address of the 2 element array containing the data vmsd
+ * and the key vmsd, in that order
+ * _key_type: type of the key
+ * _val_type: type of the value
+ */
+#define VMSTATE_GTREE_V(_field, _state, _version, _vmsd, \
+ _key_type, _val_type) \
+{ \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .vmsd = (_vmsd), \
+ .info = &vmstate_info_gtree, \
+ .start = sizeof(_key_type), \
+ .size = sizeof(_val_type), \
+ .offset = offsetof(_state, _field), \
+}
+
+/*
+ * For migrating a GTree with direct key and the value a pointer
+ * to _val_type
+ * The target tree must have been properly initialized
+ * _vmsd: data vmsd
+ * _val_type: type of the value
+ */
+#define VMSTATE_GTREE_DIRECT_KEY_V(_field, _state, _version, _vmsd, _val_type) \
+{ \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .vmsd = (_vmsd), \
+ .info = &vmstate_info_gtree, \
+ .start = 0, \
+ .size = sizeof(_val_type), \
+ .offset = offsetof(_state, _field), \
+}
+
/* _f : field name
_f_n : num of elements field_name
_n : num of elements
diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index 22876d1428..9c82683e37 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -154,6 +154,31 @@ extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func);
}), \
(RCUCBFunc *)g_free);
+typedef void RCUReadAuto;
+static inline RCUReadAuto *rcu_read_auto_lock(void)
+{
+ rcu_read_lock();
+ /* Anything non-NULL causes the cleanup function to be called */
+ return (void *)(uintptr_t)0x1;
+}
+
+static inline void rcu_read_auto_unlock(RCUReadAuto *r)
+{
+ rcu_read_unlock();
+}
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(RCUReadAuto, rcu_read_auto_unlock)
+
+#define WITH_RCU_READ_LOCK_GUARD() \
+ WITH_RCU_READ_LOCK_GUARD_(_rcu_read_auto##__COUNTER__)
+
+#define WITH_RCU_READ_LOCK_GUARD_(var) \
+ for (g_autoptr(RCUReadAuto) var = rcu_read_auto_lock(); \
+ (var); rcu_read_auto_unlock(var), (var) = NULL)
+
+#define RCU_READ_LOCK_GUARD() \
+ g_autoptr(RCUReadAuto) _rcu_read_auto __attribute__((unused)) = rcu_read_auto_lock()
+
#ifdef __cplusplus
}
#endif
diff --git a/memory.c b/memory.c
index 978da3d3df..c952eabb5d 100644
--- a/memory.c
+++ b/memory.c
@@ -779,14 +779,13 @@ FlatView *address_space_get_flatview(AddressSpace *as)
{
FlatView *view;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
do {
view = address_space_to_flatview(as);
/* If somebody has replaced as->current_map concurrently,
* flatview_ref returns false.
*/
} while (!flatview_ref(view));
- rcu_read_unlock();
return view;
}
@@ -2166,12 +2165,11 @@ int memory_region_get_fd(MemoryRegion *mr)
{
int fd;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
while (mr->alias) {
mr = mr->alias;
}
fd = mr->ram_block->fd;
- rcu_read_unlock();
return fd;
}
@@ -2181,14 +2179,13 @@ void *memory_region_get_ram_ptr(MemoryRegion *mr)
void *ptr;
uint64_t offset = 0;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
while (mr->alias) {
offset += mr->alias_offset;
mr = mr->alias;
}
assert(mr->ram_block);
ptr = qemu_map_ram_ptr(mr->ram_block, offset);
- rcu_read_unlock();
return ptr;
}
@@ -2578,12 +2575,11 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
hwaddr addr, uint64_t size)
{
MemoryRegionSection ret;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
ret = memory_region_find_rcu(mr, addr, size);
if (ret.mr) {
memory_region_ref(ret.mr);
}
- rcu_read_unlock();
return ret;
}
@@ -2591,9 +2587,8 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr)
{
MemoryRegion *mr;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
mr = memory_region_find_rcu(container, addr, 1).mr;
- rcu_read_unlock();
return mr && mr != container;
}
diff --git a/migration/migration.c b/migration/migration.c
index 5f7e4d15e9..3febd0f8f3 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1533,8 +1533,7 @@ static void migrate_fd_cleanup(MigrationState *s)
qemu_fclose(tmp);
}
- assert((s->state != MIGRATION_STATUS_ACTIVE) &&
- (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE));
+ assert(!migration_is_active(s));
if (s->state == MIGRATION_STATUS_CANCELLING) {
migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
@@ -1703,6 +1702,12 @@ bool migration_is_idle(void)
return false;
}
+bool migration_is_active(MigrationState *s)
+{
+ return (s->state == MIGRATION_STATUS_ACTIVE ||
+ s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
+}
+
void migrate_init(MigrationState *s)
{
/*
@@ -2481,7 +2486,7 @@ retry:
out:
res = qemu_file_get_error(rp);
if (res) {
- if (res == -EIO) {
+ if (res == -EIO && migration_in_postcopy()) {
/*
* Maybe there is something we can do: it looks like a
* network down issue, and we pause for a recovery.
@@ -3144,8 +3149,7 @@ static MigIterateState migration_iteration_run(MigrationState *s)
return MIG_ITERATE_SKIP;
}
/* Just another iteration step */
- qemu_savevm_state_iterate(s->to_dst_file,
- s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
+ qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);
} else {
trace_migration_thread_low_pending(pending_size);
migration_completion(s);
@@ -3266,8 +3270,7 @@ static void *migration_thread(void *opaque)
trace_migration_thread_setup_complete();
- while (s->state == MIGRATION_STATUS_ACTIVE ||
- s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
+ while (migration_is_active(s)) {
int64_t current_time;
if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 1f63e65ed7..abccafc8c8 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -577,8 +577,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
}
}
- postcopy_state_set(POSTCOPY_INCOMING_END);
-
if (mis->postcopy_tmp_page) {
munmap(mis->postcopy_tmp_page, mis->largest_page_size);
mis->postcopy_tmp_page = NULL;
@@ -768,9 +766,11 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
atomic_xchg(&dc->page_fault_vcpu_time[cpu], low_time_offset);
atomic_xchg(&dc->vcpu_addr[cpu], addr);
- /* check it here, not at the begining of the function,
- * due to, check could accur early than bitmap_set in
- * qemu_ufd_copy_ioctl */
+ /*
+ * check it here, not at the beginning of the function,
+ * due to, check could occur early than bitmap_set in
+ * qemu_ufd_copy_ioctl
+ */
already_received = ramblock_recv_bitmap_test(rb, (void *)addr);
if (already_received) {
atomic_xchg(&dc->vcpu_addr[cpu], 0);
@@ -1094,7 +1094,7 @@ retry:
return NULL;
}
-int postcopy_ram_enable_notify(MigrationIncomingState *mis)
+int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
{
/* Open the fd for the kernel to give us userfaults */
mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
@@ -1134,6 +1134,32 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
return -1;
}
+ mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE |
+ MAP_ANONYMOUS, -1, 0);
+ if (mis->postcopy_tmp_page == MAP_FAILED) {
+ mis->postcopy_tmp_page = NULL;
+ error_report("%s: Failed to map postcopy_tmp_page %s",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ /*
+ * Map large zero page when kernel can't use UFFDIO_ZEROPAGE for hugepages
+ */
+ mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
+ int e = errno;
+ mis->postcopy_tmp_zero_page = NULL;
+ error_report("%s: Failed to map large zero page %s",
+ __func__, strerror(e));
+ return -e;
+ }
+ memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
+
/*
* Ballooning can mark pages as absent while we're postcopying
* that would cause false userfaults.
@@ -1240,50 +1266,10 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
qemu_ram_block_host_offset(rb,
host));
} else {
- /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
- if (!mis->postcopy_tmp_zero_page) {
- mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
- PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS,
- -1, 0);
- if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
- int e = errno;
- mis->postcopy_tmp_zero_page = NULL;
- error_report("%s: %s mapping large zero page",
- __func__, strerror(e));
- return -e;
- }
- memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
- }
- return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
- rb);
+ return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, rb);
}
}
-/*
- * Returns a target page of memory that can be mapped at a later point in time
- * using postcopy_place_page
- * The same address is used repeatedly, postcopy_place_page just takes the
- * backing page away.
- * Returns: Pointer to allocated page
- *
- */
-void *postcopy_get_tmp_page(MigrationIncomingState *mis)
-{
- if (!mis->postcopy_tmp_page) {
- mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
- PROT_READ | PROT_WRITE, MAP_PRIVATE |
- MAP_ANONYMOUS, -1, 0);
- if (mis->postcopy_tmp_page == MAP_FAILED) {
- mis->postcopy_tmp_page = NULL;
- error_report("%s: %s", __func__, strerror(errno));
- return NULL;
- }
- }
-
- return mis->postcopy_tmp_page;
-}
-
#else
/* No target OS support, stubs just fail */
void fill_destination_postcopy_migration_info(MigrationInfo *info)
@@ -1321,7 +1307,7 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
return -1;
}
-int postcopy_ram_enable_notify(MigrationIncomingState *mis)
+int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
{
assert(0);
return -1;
@@ -1341,12 +1327,6 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
return -1;
}
-void *postcopy_get_tmp_page(MigrationIncomingState *mis)
-{
- assert(0);
- return NULL;
-}
-
int postcopy_wake_shared(struct PostCopyFD *pcfd,
uint64_t client_addr,
RAMBlock *rb)
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 9c8bd2bae0..9941feb63a 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -20,7 +20,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis);
* Make all of RAM sensitive to accesses to areas that haven't yet been written
* and wire up anything necessary to deal with it.
*/
-int postcopy_ram_enable_notify(MigrationIncomingState *mis);
+int postcopy_ram_incoming_setup(MigrationIncomingState *mis);
/*
* Initialise postcopy-ram, setting the RAM to a state where we can go into
@@ -100,13 +100,6 @@ typedef enum {
POSTCOPY_INCOMING_END
} PostcopyState;
-/*
- * Allocate a page of memory that can be mapped at a later point in time
- * using postcopy_place_page
- * Returns: Pointer to allocated page
- */
-void *postcopy_get_tmp_page(MigrationIncomingState *mis);
-
PostcopyState postcopy_state_get(void);
/* Set the state and return the old state */
PostcopyState postcopy_state_set(PostcopyState new_state);
diff --git a/migration/ram.c b/migration/ram.c
index 22423f08cd..5078f94490 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -181,14 +181,14 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
RAMBlock *block;
int ret = 0;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
+
RAMBLOCK_FOREACH_NOT_IGNORED(block) {
ret = func(block, opaque);
if (ret) {
break;
}
}
- rcu_read_unlock();
return ret;
}
@@ -791,13 +791,10 @@ static void multifd_pages_clear(MultiFDPages_t *pages)
static void multifd_send_fill_packet(MultiFDSendParams *p)
{
MultiFDPacket_t *packet = p->packet;
- uint32_t page_max = MULTIFD_PACKET_SIZE / qemu_target_page_size();
int i;
- packet->magic = cpu_to_be32(MULTIFD_MAGIC);
- packet->version = cpu_to_be32(MULTIFD_VERSION);
packet->flags = cpu_to_be32(p->flags);
- packet->pages_alloc = cpu_to_be32(page_max);
+ packet->pages_alloc = cpu_to_be32(p->pages->allocated);
packet->pages_used = cpu_to_be32(p->pages->used);
packet->next_packet_size = cpu_to_be32(p->next_packet_size);
packet->packet_num = cpu_to_be64(p->packet_num);
@@ -838,7 +835,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
packet->pages_alloc = be32_to_cpu(packet->pages_alloc);
/*
- * If we recevied a packet that is 100 times bigger than expected
+ * If we received a packet that is 100 times bigger than expected
* just stop migration. It is a magic number.
*/
if (packet->pages_alloc > pages_max * 100) {
@@ -1132,7 +1129,6 @@ static void *multifd_send_thread(void *opaque)
p->flags = 0;
p->num_packets++;
p->num_pages += used;
- p->pages->used = 0;
qemu_mutex_unlock(&p->mutex);
trace_multifd_send(p->id, packet_num, used, flags,
@@ -1241,6 +1237,8 @@ int multifd_save_setup(void)
p->packet_len = sizeof(MultiFDPacket_t)
+ sizeof(ram_addr_t) * page_count;
p->packet = g_malloc0(p->packet_len);
+ p->packet->magic = cpu_to_be32(MULTIFD_MAGIC);
+ p->packet->version = cpu_to_be32(MULTIFD_VERSION);
p->name = g_strdup_printf("multifdsend_%d", i);
socket_send_channel_create(multifd_new_send_channel_async, p);
}
@@ -1848,12 +1846,12 @@ static void migration_bitmap_sync(RAMState *rs)
memory_global_dirty_log_sync();
qemu_mutex_lock(&rs->bitmap_mutex);
- rcu_read_lock();
- RAMBLOCK_FOREACH_NOT_IGNORED(block) {
- ramblock_sync_dirty_bitmap(rs, block);
+ WITH_RCU_READ_LOCK_GUARD() {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+ ramblock_sync_dirty_bitmap(rs, block);
+ }
+ ram_counters.remaining = ram_bytes_remaining();
}
- ram_counters.remaining = ram_bytes_remaining();
- rcu_read_unlock();
qemu_mutex_unlock(&rs->bitmap_mutex);
memory_global_after_dirty_log_sync();
@@ -2397,13 +2395,12 @@ static void migration_page_queue_free(RAMState *rs)
/* This queue generally should be empty - but in the case of a failed
* migration might have some droppings in.
*/
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
memory_region_unref(mspr->rb->mr);
QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
g_free(mspr);
}
- rcu_read_unlock();
}
/**
@@ -2424,7 +2421,8 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
RAMState *rs = ram_state;
ram_counters.postcopy_requests++;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
+
if (!rbname) {
/* Reuse last RAMBlock */
ramblock = rs->last_req_rb;
@@ -2466,12 +2464,10 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
migration_make_urgent_request();
qemu_mutex_unlock(&rs->src_page_req_mutex);
- rcu_read_unlock();
return 0;
err:
- rcu_read_unlock();
return -1;
}
@@ -2700,7 +2696,8 @@ static uint64_t ram_bytes_total_common(bool count_ignored)
RAMBlock *block;
uint64_t total = 0;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
+
if (count_ignored) {
RAMBLOCK_FOREACH_MIGRATABLE(block) {
total += block->used_length;
@@ -2710,7 +2707,6 @@ static uint64_t ram_bytes_total_common(bool count_ignored)
total += block->used_length;
}
}
- rcu_read_unlock();
return total;
}
@@ -3034,7 +3030,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
RAMBlock *block;
int ret;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
/* This should be our last sync, the src is now paused */
migration_bitmap_sync(rs);
@@ -3048,7 +3044,6 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
/* Deal with TPS != HPS and huge pages */
ret = postcopy_chunk_hostpages(ms, block);
if (ret) {
- rcu_read_unlock();
return ret;
}
@@ -3060,7 +3055,6 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
trace_ram_postcopy_send_discard_bitmap();
ret = postcopy_each_ram_send_discard(ms);
- rcu_read_unlock();
return ret;
}
@@ -3081,7 +3075,7 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length)
trace_ram_discard_range(rbname, start, length);
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
RAMBlock *rb = qemu_ram_block_by_name(rbname);
if (!rb) {
@@ -3101,8 +3095,6 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length)
ret = ram_block_discard_range(rb, start, length);
err:
- rcu_read_unlock();
-
return ret;
}
@@ -3231,13 +3223,12 @@ static void ram_init_bitmaps(RAMState *rs)
/* For memory_global_dirty_log_start below. */
qemu_mutex_lock_iothread();
qemu_mutex_lock_ramlist();
- rcu_read_lock();
-
- ram_list_init_bitmaps();
- memory_global_dirty_log_start();
- migration_bitmap_sync_precopy(rs);
- rcu_read_unlock();
+ WITH_RCU_READ_LOCK_GUARD() {
+ ram_list_init_bitmaps();
+ memory_global_dirty_log_start();
+ migration_bitmap_sync_precopy(rs);
+ }
qemu_mutex_unlock_ramlist();
qemu_mutex_unlock_iothread();
}
@@ -3373,24 +3364,23 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
}
(*rsp)->f = f;
- rcu_read_lock();
-
- qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
+ WITH_RCU_READ_LOCK_GUARD() {
+ qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
- qemu_put_byte(f, strlen(block->idstr));
- qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
- qemu_put_be64(f, block->used_length);
- if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
- qemu_put_be64(f, block->page_size);
- }
- if (migrate_ignore_shared()) {
- qemu_put_be64(f, block->mr->addr);
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ qemu_put_byte(f, strlen(block->idstr));
+ qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
+ qemu_put_be64(f, block->used_length);
+ if (migrate_postcopy_ram() && block->page_size !=
+ qemu_host_page_size) {
+ qemu_put_be64(f, block->page_size);
+ }
+ if (migrate_ignore_shared()) {
+ qemu_put_be64(f, block->mr->addr);
+ }
}
}
- rcu_read_unlock();
-
ram_control_before_iterate(f, RAM_CONTROL_SETUP);
ram_control_after_iterate(f, RAM_CONTROL_SETUP);
@@ -3425,55 +3415,57 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
goto out;
}
- rcu_read_lock();
- if (ram_list.version != rs->last_version) {
- ram_state_reset(rs);
- }
-
- /* Read version before ram_list.blocks */
- smp_rmb();
+ WITH_RCU_READ_LOCK_GUARD() {
+ if (ram_list.version != rs->last_version) {
+ ram_state_reset(rs);
+ }
- ram_control_before_iterate(f, RAM_CONTROL_ROUND);
+ /* Read version before ram_list.blocks */
+ smp_rmb();
- t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
- i = 0;
- while ((ret = qemu_file_rate_limit(f)) == 0 ||
- !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
- int pages;
+ ram_control_before_iterate(f, RAM_CONTROL_ROUND);
- if (qemu_file_get_error(f)) {
- break;
- }
+ t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+ i = 0;
+ while ((ret = qemu_file_rate_limit(f)) == 0 ||
+ !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
+ int pages;
- pages = ram_find_and_save_block(rs, false);
- /* no more pages to sent */
- if (pages == 0) {
- done = 1;
- break;
- }
+ if (qemu_file_get_error(f)) {
+ break;
+ }
- if (pages < 0) {
- qemu_file_set_error(f, pages);
- break;
- }
+ pages = ram_find_and_save_block(rs, false);
+ /* no more pages to sent */
+ if (pages == 0) {
+ done = 1;
+ break;
+ }
- rs->target_page_count += pages;
-
- /* we want to check in the 1st loop, just in case it was the 1st time
- and we had to sync the dirty bitmap.
- qemu_clock_get_ns() is a bit expensive, so we only check each some
- iterations
- */
- if ((i & 63) == 0) {
- uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
- if (t1 > MAX_WAIT) {
- trace_ram_save_iterate_big_wait(t1, i);
+ if (pages < 0) {
+ qemu_file_set_error(f, pages);
break;
}
+
+ rs->target_page_count += pages;
+
+ /*
+ * we want to check in the 1st loop, just in case it was the 1st
+ * time and we had to sync the dirty bitmap.
+ * qemu_clock_get_ns() is a bit expensive, so we only check each
+ * some iterations
+ */
+ if ((i & 63) == 0) {
+ uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
+ 1000000;
+ if (t1 > MAX_WAIT) {
+ trace_ram_save_iterate_big_wait(t1, i);
+ break;
+ }
+ }
+ i++;
}
- i++;
}
- rcu_read_unlock();
/*
* Must occur before EOS (or any QEMUFile operation)
@@ -3511,35 +3503,33 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
RAMState *rs = *temp;
int ret = 0;
- rcu_read_lock();
-
- if (!migration_in_postcopy()) {
- migration_bitmap_sync_precopy(rs);
- }
+ WITH_RCU_READ_LOCK_GUARD() {
+ if (!migration_in_postcopy()) {
+ migration_bitmap_sync_precopy(rs);
+ }
- ram_control_before_iterate(f, RAM_CONTROL_FINISH);
+ ram_control_before_iterate(f, RAM_CONTROL_FINISH);
- /* try transferring iterative blocks of memory */
+ /* try transferring iterative blocks of memory */
- /* flush all remaining blocks regardless of rate limiting */
- while (true) {
- int pages;
+ /* flush all remaining blocks regardless of rate limiting */
+ while (true) {
+ int pages;
- pages = ram_find_and_save_block(rs, !migration_in_colo_state());
- /* no more blocks to sent */
- if (pages == 0) {
- break;
- }
- if (pages < 0) {
- ret = pages;
- break;
+ pages = ram_find_and_save_block(rs, !migration_in_colo_state());
+ /* no more blocks to sent */
+ if (pages == 0) {
+ break;
+ }
+ if (pages < 0) {
+ ret = pages;
+ break;
+ }
}
- }
- flush_compressed_data(rs);
- ram_control_after_iterate(f, RAM_CONTROL_FINISH);
-
- rcu_read_unlock();
+ flush_compressed_data(rs);
+ ram_control_after_iterate(f, RAM_CONTROL_FINISH);
+ }
multifd_send_sync_main(rs);
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
@@ -3562,9 +3552,9 @@ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
if (!migration_in_postcopy() &&
remaining_size < max_size) {
qemu_mutex_lock_iothread();
- rcu_read_lock();
- migration_bitmap_sync_precopy(rs);
- rcu_read_unlock();
+ WITH_RCU_READ_LOCK_GUARD() {
+ migration_bitmap_sync_precopy(rs);
+ }
qemu_mutex_unlock_iothread();
remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
}
@@ -3908,7 +3898,13 @@ int colo_init_ram_cache(void)
error_report("%s: Can't alloc memory for COLO cache of block %s,"
"size 0x" RAM_ADDR_FMT, __func__, block->idstr,
block->used_length);
- goto out_locked;
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+ if (block->colo_cache) {
+ qemu_anon_ram_free(block->colo_cache, block->used_length);
+ block->colo_cache = NULL;
+ }
+ }
+ return -errno;
}
memcpy(block->colo_cache, block->host, block->used_length);
}
@@ -3934,18 +3930,6 @@ int colo_init_ram_cache(void)
memory_global_dirty_log_start();
return 0;
-
-out_locked:
-
- RAMBLOCK_FOREACH_NOT_IGNORED(block) {
- if (block->colo_cache) {
- qemu_anon_ram_free(block->colo_cache, block->used_length);
- block->colo_cache = NULL;
- }
- }
-
- rcu_read_unlock();
- return -errno;
}
/* It is need to hold the global lock to call this helper */
@@ -3959,16 +3943,14 @@ void colo_release_ram_cache(void)
block->bmap = NULL;
}
- rcu_read_lock();
-
- RAMBLOCK_FOREACH_NOT_IGNORED(block) {
- if (block->colo_cache) {
- qemu_anon_ram_free(block->colo_cache, block->used_length);
- block->colo_cache = NULL;
+ WITH_RCU_READ_LOCK_GUARD() {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+ if (block->colo_cache) {
+ qemu_anon_ram_free(block->colo_cache, block->used_length);
+ block->colo_cache = NULL;
+ }
}
}
-
- rcu_read_unlock();
qemu_mutex_destroy(&ram_state->bitmap_mutex);
g_free(ram_state);
ram_state = NULL;
@@ -4048,7 +4030,7 @@ static int ram_load_postcopy(QEMUFile *f)
bool matches_target_page_size = false;
MigrationIncomingState *mis = migration_incoming_get_current();
/* Temporary page that is later 'placed' */
- void *postcopy_host_page = postcopy_get_tmp_page(mis);
+ void *postcopy_host_page = mis->postcopy_tmp_page;
void *last_host = NULL;
bool all_zero = false;
@@ -4206,31 +4188,30 @@ static void colo_flush_ram_cache(void)
unsigned long offset = 0;
memory_global_dirty_log_sync();
- rcu_read_lock();
- RAMBLOCK_FOREACH_NOT_IGNORED(block) {
- ramblock_sync_dirty_bitmap(ram_state, block);
+ WITH_RCU_READ_LOCK_GUARD() {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+ ramblock_sync_dirty_bitmap(ram_state, block);
+ }
}
- rcu_read_unlock();
trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
- rcu_read_lock();
- block = QLIST_FIRST_RCU(&ram_list.blocks);
+ WITH_RCU_READ_LOCK_GUARD() {
+ block = QLIST_FIRST_RCU(&ram_list.blocks);
- while (block) {
- offset = migration_bitmap_find_dirty(ram_state, block, offset);
+ while (block) {
+ offset = migration_bitmap_find_dirty(ram_state, block, offset);
- if (offset << TARGET_PAGE_BITS >= block->used_length) {
- offset = 0;
- block = QLIST_NEXT_RCU(block, next);
- } else {
- migration_bitmap_clear_dirty(ram_state, block, offset);
- dst_host = block->host + (offset << TARGET_PAGE_BITS);
- src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
- memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
+ if (offset << TARGET_PAGE_BITS >= block->used_length) {
+ offset = 0;
+ block = QLIST_NEXT_RCU(block, next);
+ } else {
+ migration_bitmap_clear_dirty(ram_state, block, offset);
+ dst_host = block->host + (offset << TARGET_PAGE_BITS);
+ src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
+ memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
+ }
}
}
-
- rcu_read_unlock();
trace_colo_flush_ram_cache_end();
}
@@ -4429,16 +4410,15 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
* it will be necessary to reduce the granularity of this
* critical section.
*/
- rcu_read_lock();
+ WITH_RCU_READ_LOCK_GUARD() {
+ if (postcopy_running) {
+ ret = ram_load_postcopy(f);
+ } else {
+ ret = ram_load_precopy(f);
+ }
- if (postcopy_running) {
- ret = ram_load_postcopy(f);
- } else {
- ret = ram_load_precopy(f);
+ ret |= wait_for_decompress_done();
}
-
- ret |= wait_for_decompress_done();
- rcu_read_unlock();
trace_ram_load_complete(ret, seq_iter);
if (!ret && migration_incoming_in_colo_state()) {
diff --git a/migration/rdma.c b/migration/rdma.c
index 4c74e88a37..e241dcb992 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -88,7 +88,6 @@ static uint32_t known_capabilities = RDMA_CAPABILITY_PIN_ALL;
" to abort!"); \
rdma->error_reported = 1; \
} \
- rcu_read_unlock(); \
return rdma->error_state; \
} \
} while (0)
@@ -2678,11 +2677,10 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
size_t i;
size_t len = 0;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
rdma = atomic_rcu_read(&rioc->rdmaout);
if (!rdma) {
- rcu_read_unlock();
return -EIO;
}
@@ -2695,7 +2693,6 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
ret = qemu_rdma_write_flush(f, rdma);
if (ret < 0) {
rdma->error_state = ret;
- rcu_read_unlock();
return ret;
}
@@ -2715,7 +2712,6 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
if (ret < 0) {
rdma->error_state = ret;
- rcu_read_unlock();
return ret;
}
@@ -2724,7 +2720,6 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
}
}
- rcu_read_unlock();
return done;
}
@@ -2764,11 +2759,10 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
ssize_t i;
size_t done = 0;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
rdma = atomic_rcu_read(&rioc->rdmain);
if (!rdma) {
- rcu_read_unlock();
return -EIO;
}
@@ -2805,7 +2799,6 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
if (ret < 0) {
rdma->error_state = ret;
- rcu_read_unlock();
return ret;
}
@@ -2819,14 +2812,12 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
/* Still didn't get enough, so lets just return */
if (want) {
if (done == 0) {
- rcu_read_unlock();
return QIO_CHANNEL_ERR_BLOCK;
} else {
break;
}
}
}
- rcu_read_unlock();
return done;
}
@@ -2882,7 +2873,7 @@ qio_channel_rdma_source_prepare(GSource *source,
GIOCondition cond = 0;
*timeout = -1;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
if (rsource->condition == G_IO_IN) {
rdma = atomic_rcu_read(&rsource->rioc->rdmain);
} else {
@@ -2891,7 +2882,6 @@ qio_channel_rdma_source_prepare(GSource *source,
if (!rdma) {
error_report("RDMAContext is NULL when prepare Gsource");
- rcu_read_unlock();
return FALSE;
}
@@ -2900,7 +2890,6 @@ qio_channel_rdma_source_prepare(GSource *source,
}
cond |= G_IO_OUT;
- rcu_read_unlock();
return cond & rsource->condition;
}
@@ -2911,7 +2900,7 @@ qio_channel_rdma_source_check(GSource *source)
RDMAContext *rdma;
GIOCondition cond = 0;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
if (rsource->condition == G_IO_IN) {
rdma = atomic_rcu_read(&rsource->rioc->rdmain);
} else {
@@ -2920,7 +2909,6 @@ qio_channel_rdma_source_check(GSource *source)
if (!rdma) {
error_report("RDMAContext is NULL when check Gsource");
- rcu_read_unlock();
return FALSE;
}
@@ -2929,7 +2917,6 @@ qio_channel_rdma_source_check(GSource *source)
}
cond |= G_IO_OUT;
- rcu_read_unlock();
return cond & rsource->condition;
}
@@ -2943,7 +2930,7 @@ qio_channel_rdma_source_dispatch(GSource *source,
RDMAContext *rdma;
GIOCondition cond = 0;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
if (rsource->condition == G_IO_IN) {
rdma = atomic_rcu_read(&rsource->rioc->rdmain);
} else {
@@ -2952,7 +2939,6 @@ qio_channel_rdma_source_dispatch(GSource *source,
if (!rdma) {
error_report("RDMAContext is NULL when dispatch Gsource");
- rcu_read_unlock();
return FALSE;
}
@@ -2961,7 +2947,6 @@ qio_channel_rdma_source_dispatch(GSource *source,
}
cond |= G_IO_OUT;
- rcu_read_unlock();
return (*func)(QIO_CHANNEL(rsource->rioc),
(cond & rsource->condition),
user_data);
@@ -3073,7 +3058,7 @@ qio_channel_rdma_shutdown(QIOChannel *ioc,
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
RDMAContext *rdmain, *rdmaout;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
rdmain = atomic_rcu_read(&rioc->rdmain);
rdmaout = atomic_rcu_read(&rioc->rdmain);
@@ -3100,7 +3085,6 @@ qio_channel_rdma_shutdown(QIOChannel *ioc,
break;
}
- rcu_read_unlock();
return 0;
}
@@ -3146,18 +3130,16 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
RDMAContext *rdma;
int ret;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
rdma = atomic_rcu_read(&rioc->rdmaout);
if (!rdma) {
- rcu_read_unlock();
return -EIO;
}
CHECK_ERROR_STATE();
if (migration_in_postcopy()) {
- rcu_read_unlock();
return RAM_SAVE_CONTROL_NOT_SUPP;
}
@@ -3242,11 +3224,9 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
}
}
- rcu_read_unlock();
return RAM_SAVE_CONTROL_DELAYED;
err:
rdma->error_state = ret;
- rcu_read_unlock();
return ret;
}
@@ -3470,11 +3450,10 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque)
int count = 0;
int i = 0;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
rdma = atomic_rcu_read(&rioc->rdmain);
if (!rdma) {
- rcu_read_unlock();
return -EIO;
}
@@ -3717,7 +3696,6 @@ out:
if (ret < 0) {
rdma->error_state = ret;
}
- rcu_read_unlock();
return ret;
}
@@ -3735,11 +3713,10 @@ rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name)
int curr;
int found = -1;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
rdma = atomic_rcu_read(&rioc->rdmain);
if (!rdma) {
- rcu_read_unlock();
return -EIO;
}
@@ -3753,7 +3730,6 @@ rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name)
if (found == -1) {
error_report("RAMBlock '%s' not found on destination", name);
- rcu_read_unlock();
return -ENOENT;
}
@@ -3761,7 +3737,6 @@ rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name)
trace_rdma_block_notification_handle(name, rdma->next_src_index);
rdma->next_src_index++;
- rcu_read_unlock();
return 0;
}
@@ -3786,17 +3761,15 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
RDMAContext *rdma;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
rdma = atomic_rcu_read(&rioc->rdmaout);
if (!rdma) {
- rcu_read_unlock();
return -EIO;
}
CHECK_ERROR_STATE();
if (migration_in_postcopy()) {
- rcu_read_unlock();
return 0;
}
@@ -3804,7 +3777,6 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
qemu_put_be64(f, RAM_SAVE_FLAG_HOOK);
qemu_fflush(f);
- rcu_read_unlock();
return 0;
}
@@ -3821,17 +3793,15 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
RDMAControlHeader head = { .len = 0, .repeat = 1 };
int ret = 0;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
rdma = atomic_rcu_read(&rioc->rdmaout);
if (!rdma) {
- rcu_read_unlock();
return -EIO;
}
CHECK_ERROR_STATE();
if (migration_in_postcopy()) {
- rcu_read_unlock();
return 0;
}
@@ -3863,7 +3833,6 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
qemu_rdma_reg_whole_ram_blocks : NULL);
if (ret < 0) {
ERROR(errp, "receiving remote info!");
- rcu_read_unlock();
return ret;
}
@@ -3887,7 +3856,6 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
"not identical on both the source and destination.",
local->nb_blocks, nb_dest_blocks);
rdma->error_state = -EINVAL;
- rcu_read_unlock();
return -EINVAL;
}
@@ -3904,7 +3872,6 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
local->block[i].length,
rdma->dest_blocks[i].length);
rdma->error_state = -EINVAL;
- rcu_read_unlock();
return -EINVAL;
}
local->block[i].remote_host_addr =
@@ -3922,11 +3889,9 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
goto err;
}
- rcu_read_unlock();
return 0;
err:
rdma->error_state = ret;
- rcu_read_unlock();
return ret;
}
diff --git a/migration/savevm.c b/migration/savevm.c
index bb9462a54d..8d95e261f6 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1215,6 +1215,8 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
save_section_footer(f, se);
if (ret < 0) {
+ error_report("failed to save SaveStateEntry with id(name): %d(%s)",
+ se->section_id, se->idstr);
qemu_file_set_error(f, ret);
}
if (ret <= 0) {
@@ -1835,6 +1837,8 @@ static void *postcopy_ram_listen_thread(void *opaque)
rcu_unregister_thread();
mis->have_listen_thread = false;
+ postcopy_state_set(POSTCOPY_INCOMING_END);
+
return NULL;
}
@@ -1865,7 +1869,7 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
* shouldn't be doing anything yet so don't actually expect requests
*/
if (migrate_postcopy_ram()) {
- if (postcopy_ram_enable_notify(mis)) {
+ if (postcopy_ram_incoming_setup(mis)) {
postcopy_ram_incoming_cleanup(mis);
return -1;
}
@@ -1876,11 +1880,6 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
return -1;
}
- if (mis->have_listen_thread) {
- error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread");
- return -1;
- }
-
mis->have_listen_thread = true;
/* Start up the listening thread and wait for it to signal ready */
qemu_sem_init(&mis->listen_thread_sem, 0);
@@ -1934,7 +1933,7 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
/* After all discards we can start running and asking for pages */
static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
{
- PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
+ PostcopyState ps = postcopy_state_get();
trace_loadvm_postcopy_handle_run();
if (ps != POSTCOPY_INCOMING_LISTENING) {
@@ -1942,6 +1941,7 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
return -1;
}
+ postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis);
qemu_bh_schedule(mis->bh);
diff --git a/migration/trace-events b/migration/trace-events
index 858d415d56..6dee7b5389 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -71,6 +71,11 @@ get_qtailq_end(const char *name, const char *reason, int val) "%s %s/%d"
put_qtailq(const char *name, int version_id) "%s v%d"
put_qtailq_end(const char *name, const char *reason) "%s %s"
+get_gtree(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, uint32_t nnodes) "%s(%s/%s) nnodes=%d"
+get_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, int ret) "%s(%s/%s) %d"
+put_gtree(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, uint32_t nnodes) "%s(%s/%s) nnodes=%d"
+put_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, int ret) "%s(%s/%s) %d"
+
# qemu-file.c
qemu_file_fclose(void) ""
diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
index bee658a1b2..7236cf92bc 100644
--- a/migration/vmstate-types.c
+++ b/migration/vmstate-types.c
@@ -691,3 +691,155 @@ const VMStateInfo vmstate_info_qtailq = {
.get = get_qtailq,
.put = put_qtailq,
};
+
+struct put_gtree_data {
+ QEMUFile *f;
+ const VMStateDescription *key_vmsd;
+ const VMStateDescription *val_vmsd;
+ QJSON *vmdesc;
+ int ret;
+};
+
+static gboolean put_gtree_elem(gpointer key, gpointer value, gpointer data)
+{
+ struct put_gtree_data *capsule = (struct put_gtree_data *)data;
+ QEMUFile *f = capsule->f;
+ int ret;
+
+ qemu_put_byte(f, true);
+
+ /* put the key */
+ if (!capsule->key_vmsd) {
+ qemu_put_be64(f, (uint64_t)(uintptr_t)(key)); /* direct key */
+ } else {
+ ret = vmstate_save_state(f, capsule->key_vmsd, key, capsule->vmdesc);
+ if (ret) {
+ capsule->ret = ret;
+ return true;
+ }
+ }
+
+ /* put the data */
+ ret = vmstate_save_state(f, capsule->val_vmsd, value, capsule->vmdesc);
+ if (ret) {
+ capsule->ret = ret;
+ return true;
+ }
+ return false;
+}
+
+static int put_gtree(QEMUFile *f, void *pv, size_t unused_size,
+ const VMStateField *field, QJSON *vmdesc)
+{
+ bool direct_key = (!field->start);
+ const VMStateDescription *key_vmsd = direct_key ? NULL : &field->vmsd[1];
+ const VMStateDescription *val_vmsd = &field->vmsd[0];
+ const char *key_vmsd_name = direct_key ? "direct" : key_vmsd->name;
+ struct put_gtree_data capsule = {
+ .f = f,
+ .key_vmsd = key_vmsd,
+ .val_vmsd = val_vmsd,
+ .vmdesc = vmdesc,
+ .ret = 0};
+ GTree **pval = pv;
+ GTree *tree = *pval;
+ uint32_t nnodes = g_tree_nnodes(tree);
+ int ret;
+
+ trace_put_gtree(field->name, key_vmsd_name, val_vmsd->name, nnodes);
+ qemu_put_be32(f, nnodes);
+ g_tree_foreach(tree, put_gtree_elem, (gpointer)&capsule);
+ qemu_put_byte(f, false);
+ ret = capsule.ret;
+ if (ret) {
+ error_report("%s : failed to save gtree (%d)", field->name, ret);
+ }
+ trace_put_gtree_end(field->name, key_vmsd_name, val_vmsd->name, ret);
+ return ret;
+}
+
+static int get_gtree(QEMUFile *f, void *pv, size_t unused_size,
+ const VMStateField *field)
+{
+ bool direct_key = (!field->start);
+ const VMStateDescription *key_vmsd = direct_key ? NULL : &field->vmsd[1];
+ const VMStateDescription *val_vmsd = &field->vmsd[0];
+ const char *key_vmsd_name = direct_key ? "direct" : key_vmsd->name;
+ int version_id = field->version_id;
+ size_t key_size = field->start;
+ size_t val_size = field->size;
+ int nnodes, count = 0;
+ GTree **pval = pv;
+ GTree *tree = *pval;
+ void *key, *val;
+ int ret = 0;
+
+ /* in case of direct key, the key vmsd can be {}, ie. check fields */
+ if (!direct_key && version_id > key_vmsd->version_id) {
+ error_report("%s %s", key_vmsd->name, "too new");
+ return -EINVAL;
+ }
+ if (!direct_key && version_id < key_vmsd->minimum_version_id) {
+ error_report("%s %s", key_vmsd->name, "too old");
+ return -EINVAL;
+ }
+ if (version_id > val_vmsd->version_id) {
+ error_report("%s %s", val_vmsd->name, "too new");
+ return -EINVAL;
+ }
+ if (version_id < val_vmsd->minimum_version_id) {
+ error_report("%s %s", val_vmsd->name, "too old");
+ return -EINVAL;
+ }
+
+ nnodes = qemu_get_be32(f);
+ trace_get_gtree(field->name, key_vmsd_name, val_vmsd->name, nnodes);
+
+ while (qemu_get_byte(f)) {
+ if ((++count) > nnodes) {
+ ret = -EINVAL;
+ break;
+ }
+ if (direct_key) {
+ key = (void *)(uintptr_t)qemu_get_be64(f);
+ } else {
+ key = g_malloc0(key_size);
+ ret = vmstate_load_state(f, key_vmsd, key, version_id);
+ if (ret) {
+ error_report("%s : failed to load %s (%d)",
+ field->name, key_vmsd->name, ret);
+ goto key_error;
+ }
+ }
+ val = g_malloc0(val_size);
+ ret = vmstate_load_state(f, val_vmsd, val, version_id);
+ if (ret) {
+ error_report("%s : failed to load %s (%d)",
+ field->name, val_vmsd->name, ret);
+ goto val_error;
+ }
+ g_tree_insert(tree, key, val);
+ }
+ if (count != nnodes) {
+ error_report("%s inconsistent stream when loading the gtree",
+ field->name);
+ return -EINVAL;
+ }
+ trace_get_gtree_end(field->name, key_vmsd_name, val_vmsd->name, ret);
+ return ret;
+val_error:
+ g_free(val);
+key_error:
+ if (!direct_key) {
+ g_free(key);
+ }
+ trace_get_gtree_end(field->name, key_vmsd_name, val_vmsd->name, ret);
+ return ret;
+}
+
+
+const VMStateInfo vmstate_info_gtree = {
+ .name = "gtree",
+ .get = get_gtree,
+ .put = put_gtree,
+};
diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c
index e80c4c6143..1e5be1d4ff 100644
--- a/tests/test-vmstate.c
+++ b/tests/test-vmstate.c
@@ -812,6 +812,423 @@ static void test_load_q(void)
qemu_fclose(fload);
}
+/* interval (key) */
+typedef struct TestGTreeInterval {
+ uint64_t low;
+ uint64_t high;
+} TestGTreeInterval;
+
+#define VMSTATE_INTERVAL \
+{ \
+ .name = "interval", \
+ .version_id = 1, \
+ .minimum_version_id = 1, \
+ .fields = (VMStateField[]) { \
+ VMSTATE_UINT64(low, TestGTreeInterval), \
+ VMSTATE_UINT64(high, TestGTreeInterval), \
+ VMSTATE_END_OF_LIST() \
+ } \
+}
+
+/* mapping (value) */
+typedef struct TestGTreeMapping {
+ uint64_t phys_addr;
+ uint32_t flags;
+} TestGTreeMapping;
+
+#define VMSTATE_MAPPING \
+{ \
+ .name = "mapping", \
+ .version_id = 1, \
+ .minimum_version_id = 1, \
+ .fields = (VMStateField[]) { \
+ VMSTATE_UINT64(phys_addr, TestGTreeMapping), \
+ VMSTATE_UINT32(flags, TestGTreeMapping), \
+ VMSTATE_END_OF_LIST() \
+ }, \
+}
+
+static const VMStateDescription vmstate_interval_mapping[2] = {
+ VMSTATE_MAPPING, /* value */
+ VMSTATE_INTERVAL /* key */
+};
+
+typedef struct TestGTreeDomain {
+ int32_t id;
+ GTree *mappings;
+} TestGTreeDomain;
+
+typedef struct TestGTreeIOMMU {
+ int32_t id;
+ GTree *domains;
+} TestGTreeIOMMU;
+
+/* Interval comparison function */
+static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
+{
+ TestGTreeInterval *inta = (TestGTreeInterval *)a;
+ TestGTreeInterval *intb = (TestGTreeInterval *)b;
+
+ if (inta->high < intb->low) {
+ return -1;
+ } else if (intb->high < inta->low) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* ID comparison function */
+static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
+{
+ uint ua = GPOINTER_TO_UINT(a);
+ uint ub = GPOINTER_TO_UINT(b);
+ return (ua > ub) - (ua < ub);
+}
+
+static void destroy_domain(gpointer data)
+{
+ TestGTreeDomain *domain = (TestGTreeDomain *)data;
+
+ g_tree_destroy(domain->mappings);
+ g_free(domain);
+}
+
+static int domain_preload(void *opaque)
+{
+ TestGTreeDomain *domain = opaque;
+
+ domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
+ NULL, g_free, g_free);
+ return 0;
+}
+
+static int iommu_preload(void *opaque)
+{
+ TestGTreeIOMMU *iommu = opaque;
+
+ iommu->domains = g_tree_new_full((GCompareDataFunc)int_cmp,
+ NULL, NULL, destroy_domain);
+ return 0;
+}
+
+static const VMStateDescription vmstate_domain = {
+ .name = "domain",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .pre_load = domain_preload,
+ .fields = (VMStateField[]) {
+ VMSTATE_INT32(id, TestGTreeDomain),
+ VMSTATE_GTREE_V(mappings, TestGTreeDomain, 1,
+ vmstate_interval_mapping,
+ TestGTreeInterval, TestGTreeMapping),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static const VMStateDescription vmstate_iommu = {
+ .name = "iommu",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .pre_load = iommu_preload,
+ .fields = (VMStateField[]) {
+ VMSTATE_INT32(id, TestGTreeIOMMU),
+ VMSTATE_GTREE_DIRECT_KEY_V(domains, TestGTreeIOMMU, 1,
+ &vmstate_domain, TestGTreeDomain),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+uint8_t first_domain_dump[] = {
+ /* id */
+ 0x00, 0x0, 0x0, 0x6,
+ 0x00, 0x0, 0x0, 0x2, /* 2 mappings */
+ 0x1, /* start of a */
+ /* a */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1F, 0xFF,
+ /* map_a */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x00,
+ 0x00, 0x00, 0x00, 0x01,
+ 0x1, /* start of b */
+ /* b */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4F, 0xFF,
+ /* map_b */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x02,
+ 0x0, /* end of gtree */
+ QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
+};
+
+static TestGTreeDomain *create_first_domain(void)
+{
+ TestGTreeDomain *domain;
+ TestGTreeMapping *map_a, *map_b;
+ TestGTreeInterval *a, *b;
+
+ domain = g_malloc0(sizeof(TestGTreeDomain));
+ domain->id = 6;
+
+ a = g_malloc0(sizeof(TestGTreeInterval));
+ a->low = 0x1000;
+ a->high = 0x1FFF;
+
+ b = g_malloc0(sizeof(TestGTreeInterval));
+ b->low = 0x4000;
+ b->high = 0x4FFF;
+
+ map_a = g_malloc0(sizeof(TestGTreeMapping));
+ map_a->phys_addr = 0xa000;
+ map_a->flags = 1;
+
+ map_b = g_malloc0(sizeof(TestGTreeMapping));
+ map_b->phys_addr = 0xe0000;
+ map_b->flags = 2;
+
+ domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, NULL,
+ (GDestroyNotify)g_free,
+ (GDestroyNotify)g_free);
+ g_tree_insert(domain->mappings, a, map_a);
+ g_tree_insert(domain->mappings, b, map_b);
+ return domain;
+}
+
+static void test_gtree_save_domain(void)
+{
+ TestGTreeDomain *first_domain = create_first_domain();
+
+ save_vmstate(&vmstate_domain, first_domain);
+ compare_vmstate(first_domain_dump, sizeof(first_domain_dump));
+ destroy_domain(first_domain);
+}
+
+struct match_node_data {
+ GTree *tree;
+ gpointer key;
+ gpointer value;
+};
+
+struct tree_cmp_data {
+ GTree *tree1;
+ GTree *tree2;
+ GTraverseFunc match_node;
+};
+
+static gboolean match_interval_mapping_node(gpointer key,
+ gpointer value, gpointer data)
+{
+ TestGTreeMapping *map_a, *map_b;
+ TestGTreeInterval *a, *b;
+ struct match_node_data *d = (struct match_node_data *)data;
+ char *str = g_strdup_printf("dest");
+
+ g_free(str);
+ a = (TestGTreeInterval *)key;
+ b = (TestGTreeInterval *)d->key;
+
+ map_a = (TestGTreeMapping *)value;
+ map_b = (TestGTreeMapping *)d->value;
+
+ assert(a->low == b->low);
+ assert(a->high == b->high);
+ assert(map_a->phys_addr == map_b->phys_addr);
+ assert(map_a->flags == map_b->flags);
+ g_tree_remove(d->tree, key);
+ return true;
+}
+
+static gboolean diff_tree(gpointer key, gpointer value, gpointer data)
+{
+ struct tree_cmp_data *tp = (struct tree_cmp_data *)data;
+ struct match_node_data d = {tp->tree2, key, value};
+
+ g_tree_foreach(tp->tree2, tp->match_node, &d);
+ g_tree_remove(tp->tree1, key);
+ return false;
+}
+
+static void compare_trees(GTree *tree1, GTree *tree2,
+ GTraverseFunc function)
+{
+ struct tree_cmp_data tp = {tree1, tree2, function};
+
+ g_tree_foreach(tree1, diff_tree, &tp);
+ assert(g_tree_nnodes(tree1) == 0);
+ assert(g_tree_nnodes(tree2) == 0);
+}
+
+static void diff_domain(TestGTreeDomain *d1, TestGTreeDomain *d2)
+{
+ assert(d1->id == d2->id);
+ compare_trees(d1->mappings, d2->mappings, match_interval_mapping_node);
+}
+
+static gboolean match_domain_node(gpointer key, gpointer value, gpointer data)
+{
+ uint64_t id1, id2;
+ TestGTreeDomain *d1, *d2;
+ struct match_node_data *d = (struct match_node_data *)data;
+
+ id1 = (uint64_t)(uintptr_t)key;
+ id2 = (uint64_t)(uintptr_t)d->key;
+ d1 = (TestGTreeDomain *)value;
+ d2 = (TestGTreeDomain *)d->value;
+ assert(id1 == id2);
+ diff_domain(d1, d2);
+ g_tree_remove(d->tree, key);
+ return true;
+}
+
+static void diff_iommu(TestGTreeIOMMU *iommu1, TestGTreeIOMMU *iommu2)
+{
+ assert(iommu1->id == iommu2->id);
+ compare_trees(iommu1->domains, iommu2->domains, match_domain_node);
+}
+
+static void test_gtree_load_domain(void)
+{
+ TestGTreeDomain *dest_domain = g_malloc0(sizeof(TestGTreeDomain));
+ TestGTreeDomain *orig_domain = create_first_domain();
+ QEMUFile *fload, *fsave;
+ char eof;
+
+ fsave = open_test_file(true);
+ qemu_put_buffer(fsave, first_domain_dump, sizeof(first_domain_dump));
+ g_assert(!qemu_file_get_error(fsave));
+ qemu_fclose(fsave);
+
+ fload = open_test_file(false);
+
+ vmstate_load_state(fload, &vmstate_domain, dest_domain, 1);
+ eof = qemu_get_byte(fload);
+ g_assert(!qemu_file_get_error(fload));
+ g_assert_cmpint(orig_domain->id, ==, dest_domain->id);
+ g_assert_cmpint(eof, ==, QEMU_VM_EOF);
+
+ diff_domain(orig_domain, dest_domain);
+ destroy_domain(orig_domain);
+ destroy_domain(dest_domain);
+ qemu_fclose(fload);
+}
+
+uint8_t iommu_dump[] = {
+ /* iommu id */
+ 0x00, 0x0, 0x0, 0x7,
+ 0x00, 0x0, 0x0, 0x2, /* 2 domains */
+ 0x1,/* start of domain 5 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x0, 0x0, 0x5, /* key = 5 */
+ 0x00, 0x0, 0x0, 0x5, /* domain1 id */
+ 0x00, 0x0, 0x0, 0x1, /* 1 mapping */
+ 0x1, /* start of mappings */
+ /* c */
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xFF, 0xFF, 0xFF,
+ /* map_c */
+ 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00,
+ 0x00, 0x0, 0x0, 0x3,
+ 0x0, /* end of domain1 mappings*/
+ 0x1,/* start of domain 6 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x0, 0x0, 0x6, /* key = 6 */
+ 0x00, 0x0, 0x0, 0x6, /* domain6 id */
+ 0x00, 0x0, 0x0, 0x2, /* 2 mappings */
+ 0x1, /* start of a */
+ /* a */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1F, 0xFF,
+ /* map_a */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x00,
+ 0x00, 0x00, 0x00, 0x01,
+ 0x1, /* start of b */
+ /* b */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4F, 0xFF,
+ /* map_b */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x02,
+ 0x0, /* end of domain6 mappings*/
+ 0x0, /* end of domains */
+ QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
+};
+
+static TestGTreeIOMMU *create_iommu(void)
+{
+ TestGTreeIOMMU *iommu = g_malloc0(sizeof(TestGTreeIOMMU));
+ TestGTreeDomain *first_domain = create_first_domain();
+ TestGTreeDomain *second_domain;
+ TestGTreeMapping *map_c;
+ TestGTreeInterval *c;
+
+ iommu->id = 7;
+ iommu->domains = g_tree_new_full((GCompareDataFunc)int_cmp, NULL,
+ NULL,
+ destroy_domain);
+
+ second_domain = g_malloc0(sizeof(TestGTreeDomain));
+ second_domain->id = 5;
+ second_domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
+ NULL,
+ (GDestroyNotify)g_free,
+ (GDestroyNotify)g_free);
+
+ g_tree_insert(iommu->domains, GUINT_TO_POINTER(6), first_domain);
+ g_tree_insert(iommu->domains, (gpointer)0x0000000000000005, second_domain);
+
+ c = g_malloc0(sizeof(TestGTreeInterval));
+ c->low = 0x1000000;
+ c->high = 0x1FFFFFF;
+
+ map_c = g_malloc0(sizeof(TestGTreeMapping));
+ map_c->phys_addr = 0xF000000;
+ map_c->flags = 0x3;
+
+ g_tree_insert(second_domain->mappings, c, map_c);
+ return iommu;
+}
+
+static void destroy_iommu(TestGTreeIOMMU *iommu)
+{
+ g_tree_destroy(iommu->domains);
+ g_free(iommu);
+}
+
+static void test_gtree_save_iommu(void)
+{
+ TestGTreeIOMMU *iommu = create_iommu();
+
+ save_vmstate(&vmstate_iommu, iommu);
+ compare_vmstate(iommu_dump, sizeof(iommu_dump));
+ destroy_iommu(iommu);
+}
+
+static void test_gtree_load_iommu(void)
+{
+ TestGTreeIOMMU *dest_iommu = g_malloc0(sizeof(TestGTreeIOMMU));
+ TestGTreeIOMMU *orig_iommu = create_iommu();
+ QEMUFile *fsave, *fload;
+ char eof;
+ int ret;
+
+ fsave = open_test_file(true);
+ qemu_put_buffer(fsave, iommu_dump, sizeof(iommu_dump));
+ g_assert(!qemu_file_get_error(fsave));
+ qemu_fclose(fsave);
+
+ fload = open_test_file(false);
+ vmstate_load_state(fload, &vmstate_iommu, dest_iommu, 1);
+ ret = qemu_file_get_error(fload);
+ eof = qemu_get_byte(fload);
+ ret = qemu_file_get_error(fload);
+ g_assert(!ret);
+ g_assert_cmpint(orig_iommu->id, ==, dest_iommu->id);
+ g_assert_cmpint(eof, ==, QEMU_VM_EOF);
+
+ diff_iommu(orig_iommu, dest_iommu);
+ destroy_iommu(orig_iommu);
+ destroy_iommu(dest_iommu);
+ qemu_fclose(fload);
+}
+
typedef struct TmpTestStruct {
TestStruct *parent;
int64_t diff;
@@ -932,6 +1349,10 @@ int main(int argc, char **argv)
test_arr_ptr_prim_0_load);
g_test_add_func("/vmstate/qtailq/save/saveq", test_save_q);
g_test_add_func("/vmstate/qtailq/load/loadq", test_load_q);
+ g_test_add_func("/vmstate/gtree/save/savedomain", test_gtree_save_domain);
+ g_test_add_func("/vmstate/gtree/load/loaddomain", test_gtree_load_domain);
+ g_test_add_func("/vmstate/gtree/save/saveiommu", test_gtree_save_iommu);
+ g_test_add_func("/vmstate/gtree/load/loadiommu", test_gtree_load_iommu);
g_test_add_func("/vmstate/tmp_struct", test_tmp_struct);
g_test_run();