From 0dc3f44aca18b1be8b425f3f4feb4b3e8d68de2e Mon Sep 17 00:00:00 2001 From: Mike Day Date: Thu, 5 Sep 2013 14:41:35 -0400 Subject: Convert ram_list to RCU Allow "unlocked" reads of the ram_list by using an RCU-enabled QLIST. The ramlist mutex is kept. call_rcu callbacks are run with the iothread lock taken, but that may change in the future. Writers still take the ramlist mutex, but they no longer need to assume that the iothread lock is taken. Readers of the list, instead, no longer require either the iothread or ramlist mutex, but they need to use rcu_read_lock() and rcu_read_unlock(). One place in arch_init.c was downgrading from write side to read side like this: qemu_mutex_lock_iothread() qemu_mutex_lock_ramlist() ... qemu_mutex_unlock_iothread() ... qemu_mutex_unlock_ramlist() and the equivalent idiom is: qemu_mutex_lock_ramlist() rcu_read_lock() ... qemu_mutex_unlock_ramlist() ... rcu_read_unlock() Reviewed-by: Fam Zheng Signed-off-by: Mike Day Signed-off-by: Paolo Bonzini --- arch_init.c | 65 ++++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 18 deletions(-) (limited to 'arch_init.c') diff --git a/arch_init.c b/arch_init.c index 1ee2e35352..5fc6fc382c 100644 --- a/arch_init.c +++ b/arch_init.c @@ -52,6 +52,7 @@ #include "exec/ram_addr.h" #include "hw/acpi/acpi.h" #include "qemu/host-utils.h" +#include "qemu/rcu_queue.h" #ifdef DEBUG_ARCH_INIT #define DPRINTF(fmt, ...) \ @@ -523,9 +524,12 @@ static void migration_bitmap_sync(void) trace_migration_bitmap_sync_start(); address_space_sync_dirty_bitmap(&address_space_memory); - QLIST_FOREACH(block, &ram_list.blocks, next) { + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { migration_bitmap_sync_range(block->mr->ram_addr, block->used_length); } + rcu_read_unlock(); + trace_migration_bitmap_sync_end(migration_dirty_pages - num_dirty_pages_init); num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; @@ -648,6 +652,8 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, /* * ram_find_and_save_block: Finds a page to send and sends it to f * + * Called within an RCU critical section. + * * Returns: The number of bytes written. * 0 means no dirty pages */ @@ -661,7 +667,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage) MemoryRegion *mr; if (!block) - block = QLIST_FIRST(&ram_list.blocks); + block = QLIST_FIRST_RCU(&ram_list.blocks); while (true) { mr = block->mr; @@ -672,9 +678,9 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage) } if (offset >= block->used_length) { offset = 0; - block = QLIST_NEXT(block, next); + block = QLIST_NEXT_RCU(block, next); if (!block) { - block = QLIST_FIRST(&ram_list.blocks); + block = QLIST_FIRST_RCU(&ram_list.blocks); complete_round = true; ram_bulk_stage = false; } @@ -728,10 +734,10 @@ uint64_t ram_bytes_total(void) RAMBlock *block; uint64_t total = 0; - QLIST_FOREACH(block, &ram_list.blocks, next) { + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) total += block->used_length; - } - + rcu_read_unlock(); return total; } @@ -777,6 +783,13 @@ static void reset_ram_globals(void) #define MAX_WAIT 50 /* ms, half buffered_file limit */ + +/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has + * long-running RCU critical section. When rcu-reclaims in the code + * start to become numerous it will be necessary to reduce the + * granularity of these critical sections. + */ + static int ram_save_setup(QEMUFile *f, void *opaque) { RAMBlock *block; @@ -820,6 +833,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) /* iothread lock needed for ram_list.dirty_memory[] */ qemu_mutex_lock_iothread(); qemu_mutex_lock_ramlist(); + rcu_read_lock(); bytes_transferred = 0; reset_ram_globals(); @@ -832,7 +846,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) * gaps due to alignment or unplugs. */ migration_dirty_pages = 0; - QLIST_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { uint64_t block_pages; block_pages = block->used_length >> TARGET_PAGE_BITS; @@ -841,17 +855,18 @@ static int ram_save_setup(QEMUFile *f, void *opaque) memory_global_dirty_log_start(); migration_bitmap_sync(); + qemu_mutex_unlock_ramlist(); qemu_mutex_unlock_iothread(); qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); - QLIST_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { qemu_put_byte(f, strlen(block->idstr)); qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); qemu_put_be64(f, block->used_length); } - qemu_mutex_unlock_ramlist(); + rcu_read_unlock(); ram_control_before_iterate(f, RAM_CONTROL_SETUP); ram_control_after_iterate(f, RAM_CONTROL_SETUP); @@ -868,12 +883,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) int64_t t0; int total_sent = 0; - qemu_mutex_lock_ramlist(); - + rcu_read_lock(); if (ram_list.version != last_version) { reset_ram_globals(); } + /* Read version before ram_list.blocks */ + smp_rmb(); + ram_control_before_iterate(f, RAM_CONTROL_ROUND); t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); @@ -904,8 +921,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) } i++; } - - qemu_mutex_unlock_ramlist(); + rcu_read_unlock(); /* * Must occur before EOS (or any QEMUFile operation) @@ -933,7 +949,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) /* Called with iothread lock */ static int ram_save_complete(QEMUFile *f, void *opaque) { - qemu_mutex_lock_ramlist(); + rcu_read_lock(); + migration_bitmap_sync(); ram_control_before_iterate(f, RAM_CONTROL_FINISH); @@ -955,7 +972,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) ram_control_after_iterate(f, RAM_CONTROL_FINISH); migration_end(); - qemu_mutex_unlock_ramlist(); + rcu_read_unlock(); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); return 0; @@ -969,7 +986,9 @@ static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) if (remaining_size < max_size) { qemu_mutex_lock_iothread(); + rcu_read_lock(); migration_bitmap_sync(); + rcu_read_unlock(); qemu_mutex_unlock_iothread(); remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; } @@ -1011,6 +1030,9 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) return 0; } +/* Must be called from within a rcu critical section. + * Returns a pointer from within the RCU-protected ram_list. + */ static inline void *host_from_stream_offset(QEMUFile *f, ram_addr_t offset, int flags) @@ -1032,7 +1054,7 @@ static inline void *host_from_stream_offset(QEMUFile *f, qemu_get_buffer(f, (uint8_t *)id, len); id[len] = 0; - QLIST_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (!strncmp(id, block->idstr, sizeof(id)) && block->max_length > offset) { return memory_region_get_ram_ptr(block->mr) + offset; @@ -1065,6 +1087,12 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret = -EINVAL; } + /* This RCU critical section can be very long running. + * When RCU reclaims in the code start to become numerous, + * it will be necessary to reduce the granularity of this + * critical section. + */ + rcu_read_lock(); while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { ram_addr_t addr, total_ram_bytes; void *host; @@ -1089,7 +1117,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) id[len] = 0; length = qemu_get_be64(f); - QLIST_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (!strncmp(id, block->idstr, sizeof(id))) { if (length != block->used_length) { Error *local_err = NULL; @@ -1163,6 +1191,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) } } + rcu_read_unlock(); DPRINTF("Completed load of VM with exit code %d seq iteration " "%" PRIu64 "\n", ret, seq_iter); return ret; -- cgit v1.2.3