aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2015-01-16 10:16:14 +0000
committerPeter Maydell <peter.maydell@linaro.org>2015-01-16 10:16:14 +0000
commite68cba36360a2ab5bf0576b66df4d0eb0d822f8d (patch)
tree933ecff5cc8e380fe38693dc0cca2d7b8e479dfc
parentdf58887b20fab8fe8a6dcca4db30cd4e4077d53a (diff)
parentea987c2c21d4326bb58ee28f6888fdcf8fbda067 (diff)
Merge remote-tracking branch 'remotes/amit-migration/tags/mig-2.3-1' into staging
A set of patches collected over the holidays. Mix of optimizations and fixes. # gpg: Signature made Fri 16 Jan 2015 07:42:00 GMT using RSA key ID 854083B6 # gpg: Good signature from "Amit Shah <amit@amitshah.net>" # gpg: aka "Amit Shah <amit@kernel.org>" # gpg: aka "Amit Shah <amitshah@gmx.net>" * remotes/amit-migration/tags/mig-2.3-1: vmstate: type-check sub-arrays migration_cancel: shutdown migration socket Handle bi-directional communication for fd migration socket shutdown Tests: QEMUSizedBuffer/QEMUBuffer QEMUSizedBuffer: only free qsb that qemu_bufopen allocated xbzrle: rebuild the cache_is_cached function xbzrle: optimize XBZRLE to decrease the cache misses Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--arch_init.c8
-rw-r--r--docs/xbzrle.txt8
-rw-r--r--include/migration/page_cache.h10
-rw-r--r--include/migration/qemu-file.h10
-rw-r--r--include/migration/vmstate.h2
-rw-r--r--include/qemu/sockets.h7
-rw-r--r--migration/fd.c24
-rw-r--r--migration/migration.c12
-rw-r--r--migration/qemu-file-buf.c10
-rw-r--r--migration/qemu-file-unix.c23
-rw-r--r--migration/qemu-file.c12
-rw-r--r--page_cache.c43
-rw-r--r--tests/test-vmstate.c20
13 files changed, 143 insertions, 46 deletions
diff --git a/arch_init.c b/arch_init.c
index cfedbf08af..89c8fa46bb 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -346,7 +346,8 @@ static void xbzrle_cache_zero_page(ram_addr_t current_addr)
/* We don't care if this fails to allocate a new cache page
* as long as it updated an old one */
- cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE);
+ cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
+ bitmap_sync_count);
}
#define ENCODING_FLAG_XBZRLE 0x1
@@ -358,10 +359,11 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
int encoded_len = 0, bytes_sent = -1;
uint8_t *prev_cached_page;
- if (!cache_is_cached(XBZRLE.cache, current_addr)) {
+ if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
acct_info.xbzrle_cache_miss++;
if (!last_stage) {
- if (cache_insert(XBZRLE.cache, current_addr, *current_data) == -1) {
+ if (cache_insert(XBZRLE.cache, current_addr, *current_data,
+ bitmap_sync_count) == -1) {
return -1;
} else {
/* update *current_data when the page has been
diff --git a/docs/xbzrle.txt b/docs/xbzrle.txt
index cc3a26a91d..52c8511a4c 100644
--- a/docs/xbzrle.txt
+++ b/docs/xbzrle.txt
@@ -71,6 +71,14 @@ encoded buffer:
encoded length 24
e9 07 0f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 03 01 67 01 01 69
+Cache update strategy
+=====================
+Keeping the hot pages in the cache is effective for decreased cache
+misses. XBZRLE uses a counter as the age of each page. The counter will
+increase after each ram dirty bitmap sync. When a cache conflict is
+detected, XBZRLE will only evict pages in the cache that are older than
+a threshold.
+
Usage
======================
1. Verify the destination QEMU version is able to decode the new format.
diff --git a/include/migration/page_cache.h b/include/migration/page_cache.h
index 2d5ce2dd7a..10ed53274c 100644
--- a/include/migration/page_cache.h
+++ b/include/migration/page_cache.h
@@ -43,8 +43,10 @@ void cache_fini(PageCache *cache);
*
* @cache pointer to the PageCache struct
* @addr: page addr
+ * @current_age: current bitmap generation
*/
-bool cache_is_cached(const PageCache *cache, uint64_t addr);
+bool cache_is_cached(const PageCache *cache, uint64_t addr,
+ uint64_t current_age);
/**
* get_cached_data: Get the data cached for an addr
@@ -60,13 +62,15 @@ uint8_t *get_cached_data(const PageCache *cache, uint64_t addr);
* cache_insert: insert the page into the cache. the page cache
* will dup the data on insert. the previous value will be overwritten
*
- * Returns -1 on error
+ * Returns -1 when the page isn't inserted into cache
*
* @cache pointer to the PageCache struct
* @addr: page address
* @pdata: pointer to the page
+ * @current_age: current bitmap generation
*/
-int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata);
+int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata,
+ uint64_t current_age);
/**
* cache_resize: resize the page cache. In case of size reduction the extra
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index 401676bf4d..d843c0010c 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -84,6 +84,14 @@ typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
size_t size,
int *bytes_sent);
+/*
+ * Stop any read or write (depending on flags) on the underlying
+ * transport on the QEMUFile.
+ * Existing blocking reads/writes must be woken
+ * Returns 0 on success, -err on error
+ */
+typedef int (QEMUFileShutdownFunc)(void *opaque, bool rd, bool wr);
+
typedef struct QEMUFileOps {
QEMUFilePutBufferFunc *put_buffer;
QEMUFileGetBufferFunc *get_buffer;
@@ -94,6 +102,7 @@ typedef struct QEMUFileOps {
QEMURamHookFunc *after_ram_iterate;
QEMURamHookFunc *hook_ram_load;
QEMURamSaveFunc *save_page;
+ QEMUFileShutdownFunc *shut_down;
} QEMUFileOps;
struct QEMUSizedBuffer {
@@ -177,6 +186,7 @@ void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);
int64_t qemu_file_get_rate_limit(QEMUFile *f);
int qemu_file_get_error(QEMUFile *f);
void qemu_file_set_error(QEMUFile *f, int ret);
+int qemu_file_shutdown(QEMUFile *f);
void qemu_fflush(QEMUFile *f);
static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index e45fc49cb1..d712a651ca 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -189,7 +189,7 @@ extern const VMStateInfo vmstate_info_bitmap;
type_check_2darray(_type, typeof_field(_state, _field), _n1, _n2))
#define vmstate_offset_sub_array(_state, _field, _type, _start) \
- (offsetof(_state, _field[_start]))
+ vmstate_offset_value(_state, _field[_start], _type)
#define vmstate_offset_buffer(_state, _field) \
vmstate_offset_array(_state, _field, uint8_t, \
diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
index f47dae614a..7992ece72a 100644
--- a/include/qemu/sockets.h
+++ b/include/qemu/sockets.h
@@ -44,6 +44,13 @@ int socket_set_fast_reuse(int fd);
int send_all(int fd, const void *buf, int len1);
int recv_all(int fd, void *buf, int len1, bool single_read);
+#ifdef WIN32
+/* Windows has different names for the same constants with the same values */
+#define SHUT_RD 0
+#define SHUT_WR 1
+#define SHUT_RDWR 2
+#endif
+
/* callback function for nonblocking connect
* valid fd on success, negative error code on failure
*/
diff --git a/migration/fd.c b/migration/fd.c
index d2e523af74..129da9910b 100644
--- a/migration/fd.c
+++ b/migration/fd.c
@@ -31,13 +31,29 @@
do { } while (0)
#endif
+static bool fd_is_socket(int fd)
+{
+ struct stat stat;
+ int ret = fstat(fd, &stat);
+ if (ret == -1) {
+ /* When in doubt say no */
+ return false;
+ }
+ return S_ISSOCK(stat.st_mode);
+}
+
void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp)
{
int fd = monitor_get_fd(cur_mon, fdname, errp);
if (fd == -1) {
return;
}
- s->file = qemu_fdopen(fd, "wb");
+
+ if (fd_is_socket(fd)) {
+ s->file = qemu_fopen_socket(fd, "wb");
+ } else {
+ s->file = qemu_fdopen(fd, "wb");
+ }
migrate_fd_connect(s);
}
@@ -58,7 +74,11 @@ void fd_start_incoming_migration(const char *infd, Error **errp)
DPRINTF("Attempting to start an incoming migration via fd\n");
fd = strtol(infd, NULL, 0);
- f = qemu_fdopen(fd, "rb");
+ if (fd_is_socket(fd)) {
+ f = qemu_fopen_socket(fd, "rb");
+ } else {
+ f = qemu_fdopen(fd, "rb");
+ }
if(f == NULL) {
error_setg_errno(errp, errno, "failed to open the source descriptor");
return;
diff --git a/migration/migration.c b/migration/migration.c
index c49a05a165..b3adbc653a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -330,6 +330,7 @@ void migrate_fd_error(MigrationState *s)
static void migrate_fd_cancel(MigrationState *s)
{
int old_state ;
+ QEMUFile *f = migrate_get_current()->file;
trace_migrate_fd_cancel();
do {
@@ -339,6 +340,17 @@ static void migrate_fd_cancel(MigrationState *s)
}
migrate_set_state(s, old_state, MIG_STATE_CANCELLING);
} while (s->state != MIG_STATE_CANCELLING);
+
+ /*
+ * If we're unlucky the migration code might be stuck somewhere in a
+ * send/write while the network has failed and is waiting to timeout;
+ * if we've got shutdown(2) available then we can force it to quit.
+ * The outgoing qemu file gets closed in migrate_fd_cleanup that is
+ * called in a bh, so there is no race against this cancel.
+ */
+ if (s->state == MIG_STATE_CANCELLING && f) {
+ qemu_file_shutdown(f);
+ }
}
void add_migration_state_change_notifier(Notifier *notify)
diff --git a/migration/qemu-file-buf.c b/migration/qemu-file-buf.c
index d33dd44747..e97e0bd655 100644
--- a/migration/qemu-file-buf.c
+++ b/migration/qemu-file-buf.c
@@ -395,6 +395,7 @@ QEMUSizedBuffer *qsb_clone(const QEMUSizedBuffer *qsb)
typedef struct QEMUBuffer {
QEMUSizedBuffer *qsb;
QEMUFile *file;
+ bool qsb_allocated;
} QEMUBuffer;
static int buf_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
@@ -424,7 +425,9 @@ static int buf_close(void *opaque)
{
QEMUBuffer *s = opaque;
- qsb_free(s->qsb);
+ if (s->qsb_allocated) {
+ qsb_free(s->qsb);
+ }
g_free(s);
@@ -463,12 +466,11 @@ QEMUFile *qemu_bufopen(const char *mode, QEMUSizedBuffer *input)
}
s = g_malloc0(sizeof(QEMUBuffer));
- if (mode[0] == 'r') {
- s->qsb = input;
- }
+ s->qsb = input;
if (s->qsb == NULL) {
s->qsb = qsb_create(NULL, 0);
+ s->qsb_allocated = true;
}
if (!s->qsb) {
g_free(s);
diff --git a/migration/qemu-file-unix.c b/migration/qemu-file-unix.c
index 9682396d97..bfbc0861ab 100644
--- a/migration/qemu-file-unix.c
+++ b/migration/qemu-file-unix.c
@@ -26,6 +26,7 @@
#include "qemu/sockets.h"
#include "block/coroutine.h"
#include "migration/qemu-file.h"
+#include "migration/qemu-file-internal.h"
typedef struct QEMUFileSocket {
int fd;
@@ -84,6 +85,17 @@ static int socket_close(void *opaque)
return 0;
}
+static int socket_shutdown(void *opaque, bool rd, bool wr)
+{
+ QEMUFileSocket *s = opaque;
+
+ if (shutdown(s->fd, rd ? (wr ? SHUT_RDWR : SHUT_RD) : SHUT_WR)) {
+ return -errno;
+ } else {
+ return 0;
+ }
+}
+
static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
int64_t pos)
{
@@ -192,15 +204,18 @@ QEMUFile *qemu_fdopen(int fd, const char *mode)
}
static const QEMUFileOps socket_read_ops = {
- .get_fd = socket_get_fd,
+ .get_fd = socket_get_fd,
.get_buffer = socket_get_buffer,
- .close = socket_close
+ .close = socket_close,
+ .shut_down = socket_shutdown
+
};
static const QEMUFileOps socket_write_ops = {
- .get_fd = socket_get_fd,
+ .get_fd = socket_get_fd,
.writev_buffer = socket_writev_buffer,
- .close = socket_close
+ .close = socket_close,
+ .shut_down = socket_shutdown
};
QEMUFile *qemu_fopen_socket(int fd, const char *mode)
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index a7f2a34430..edc283073a 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -30,6 +30,18 @@
#include "migration/qemu-file-internal.h"
#include "trace.h"
+/*
+ * Stop a file from being read/written - not all backing files can do this
+ * typically only sockets can.
+ */
+int qemu_file_shutdown(QEMUFile *f)
+{
+ if (!f->ops->shut_down) {
+ return -ENOSYS;
+ }
+ return f->ops->shut_down(f->opaque, true, true);
+}
+
bool qemu_file_mode_is_not_valid(const char *mode)
{
if (mode == NULL ||
diff --git a/page_cache.c b/page_cache.c
index 89bb1ec3a0..cf8878d1d7 100644
--- a/page_cache.c
+++ b/page_cache.c
@@ -33,6 +33,9 @@
do { } while (0)
#endif
+/* the page in cache will not be replaced in two cycles */
+#define CACHED_PAGE_LIFETIME 2
+
typedef struct CacheItem CacheItem;
struct CacheItem {
@@ -122,18 +125,6 @@ static size_t cache_get_cache_pos(const PageCache *cache,
return pos;
}
-bool cache_is_cached(const PageCache *cache, uint64_t addr)
-{
- size_t pos;
-
- g_assert(cache);
- g_assert(cache->page_cache);
-
- pos = cache_get_cache_pos(cache, addr);
-
- return (cache->page_cache[pos].it_addr == addr);
-}
-
static CacheItem *cache_get_by_addr(const PageCache *cache, uint64_t addr)
{
size_t pos;
@@ -151,17 +142,35 @@ uint8_t *get_cached_data(const PageCache *cache, uint64_t addr)
return cache_get_by_addr(cache, addr)->it_data;
}
-int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata)
+bool cache_is_cached(const PageCache *cache, uint64_t addr,
+ uint64_t current_age)
{
+ CacheItem *it;
- CacheItem *it = NULL;
+ it = cache_get_by_addr(cache, addr);
- g_assert(cache);
- g_assert(cache->page_cache);
+ if (it->it_addr == addr) {
+ /* update the it_age when the cache hit */
+ it->it_age = current_age;
+ return true;
+ }
+ return false;
+}
+
+int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata,
+ uint64_t current_age)
+{
+
+ CacheItem *it;
/* actual update of entry */
it = cache_get_by_addr(cache, addr);
+ if (it->it_data && it->it_addr != addr &&
+ it->it_age + CACHED_PAGE_LIFETIME > current_age) {
+ /* the cache page is fresh, don't replace it */
+ return -1;
+ }
/* allocate page */
if (!it->it_data) {
it->it_data = g_try_malloc(cache->page_size);
@@ -174,7 +183,7 @@ int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata)
memcpy(it->it_data, pdata, cache->page_size);
- it->it_age = ++cache->max_item_age;
+ it->it_age = current_age;
it->it_addr = addr;
return 0;
diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c
index 5e0fd13cc4..39b7b01734 100644
--- a/tests/test-vmstate.c
+++ b/tests/test-vmstate.c
@@ -60,16 +60,6 @@ static QEMUFile *open_test_file(bool write)
return qemu_fdopen(fd, write ? "wb" : "rb");
}
-/* Open a read-only qemu-file from an existing memory block */
-static QEMUFile *open_mem_file_read(const void *data, size_t len)
-{
- /* The qsb gets freed by qemu_fclose */
- QEMUSizedBuffer *qsb = qsb_create(data, len);
- g_assert(qsb);
-
- return qemu_bufopen("r", qsb);
-}
-
/*
* Check that the contents of the memory-buffered file f match
* the given size/data.
@@ -450,7 +440,9 @@ static void test_load_noskip(void)
QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
};
- QEMUFile *loading = open_mem_file_read(buf, sizeof(buf));
+ QEMUSizedBuffer *qsb = qsb_create(buf, sizeof(buf));
+ g_assert(qsb);
+ QEMUFile *loading = qemu_bufopen("r", qsb);
TestStruct obj = { .skip_c_e = false };
vmstate_load_state(loading, &vmstate_skipping, &obj, 2);
g_assert(!qemu_file_get_error(loading));
@@ -461,6 +453,7 @@ static void test_load_noskip(void)
g_assert_cmpint(obj.e, ==, 50);
g_assert_cmpint(obj.f, ==, 60);
qemu_fclose(loading);
+ qsb_free(qsb);
}
static void test_load_skip(void)
@@ -473,7 +466,9 @@ static void test_load_skip(void)
QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
};
- QEMUFile *loading = open_mem_file_read(buf, sizeof(buf));
+ QEMUSizedBuffer *qsb = qsb_create(buf, sizeof(buf));
+ g_assert(qsb);
+ QEMUFile *loading = qemu_bufopen("r", qsb);
TestStruct obj = { .skip_c_e = true, .c = 300, .e = 500 };
vmstate_load_state(loading, &vmstate_skipping, &obj, 2);
g_assert(!qemu_file_get_error(loading));
@@ -484,6 +479,7 @@ static void test_load_skip(void)
g_assert_cmpint(obj.e, ==, 500);
g_assert_cmpint(obj.f, ==, 60);
qemu_fclose(loading);
+ qsb_free(qsb);
}
int main(int argc, char **argv)