aboutsummaryrefslogtreecommitdiff
path: root/xen-mapcache.c
diff options
context:
space:
mode:
authorJun Nakajima <jun.nakajima@intel.com>2010-08-31 16:41:25 +0100
committerAlexander Graf <agraf@suse.de>2011-05-08 10:10:01 +0200
commit432d268c0552fd30c8be564f7ea2504a2b546101 (patch)
treed74d75e9b70fd2ced0daf705062260dab19b40c4 /xen-mapcache.c
parent9c11a8ac886ccbb5f8e1b08e8ae12f045d783031 (diff)
xen: Introduce the Xen mapcache
On IA32 host or IA32 PAE host, at present, generally, we can't create an HVM guest with more than 2G memory, because generally it's almost impossible for Qemu to find a large enough and consecutive virtual address space to map an HVM guest's whole physical address space. The attached patch fixes this issue using dynamic mapping based on little blocks of memory. Each call to qemu_get_ram_ptr makes a call to qemu_map_cache with the lock option, so mapcache will not unmap these ram_ptr. Blocks that do not belong to the RAM, but usually to a device ROM or to a framebuffer, are handled in a separate function. So the whole RAMBlock can be map. Signed-off-by: Jun Nakajima <jun.nakajima@intel.com> Signed-off-by: Anthony PERARD <anthony.perard@citrix.com> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'xen-mapcache.c')
-rw-r--r--xen-mapcache.c349
1 files changed, 349 insertions, 0 deletions
diff --git a/xen-mapcache.c b/xen-mapcache.c
new file mode 100644
index 0000000000..a539358fb7
--- /dev/null
+++ b/xen-mapcache.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright (C) 2011 Citrix Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "config.h"
+
+#include <sys/resource.h>
+
+#include "hw/xen_backend.h"
+#include "blockdev.h"
+
+#include <xen/hvm/params.h>
+#include <sys/mman.h>
+
+#include "xen-mapcache.h"
+#include "trace.h"
+
+
+//#define MAPCACHE_DEBUG
+
+#ifdef MAPCACHE_DEBUG
+# define DPRINTF(fmt, ...) do { \
+ fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \
+} while (0)
+#else
+# define DPRINTF(fmt, ...) do { } while (0)
+#endif
+
+#if defined(__i386__)
+# define MCACHE_BUCKET_SHIFT 16
+#elif defined(__x86_64__)
+# define MCACHE_BUCKET_SHIFT 20
+#endif
+#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+#define BITS_TO_LONGS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
+#define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)]
+
+typedef struct MapCacheEntry {
+ target_phys_addr_t paddr_index;
+ uint8_t *vaddr_base;
+ DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT);
+ uint8_t lock;
+ struct MapCacheEntry *next;
+} MapCacheEntry;
+
+typedef struct MapCacheRev {
+ uint8_t *vaddr_req;
+ target_phys_addr_t paddr_index;
+ QTAILQ_ENTRY(MapCacheRev) next;
+} MapCacheRev;
+
+typedef struct MapCache {
+ MapCacheEntry *entry;
+ unsigned long nr_buckets;
+ QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries;
+
+ /* For most cases (>99.9%), the page address is the same. */
+ target_phys_addr_t last_address_index;
+ uint8_t *last_address_vaddr;
+ unsigned long max_mcache_size;
+ unsigned int mcache_bucket_shift;
+} MapCache;
+
+static MapCache *mapcache;
+
+static inline int test_bit(unsigned int bit, const unsigned long *map)
+{
+ return !!((map)[(bit) / BITS_PER_LONG] & (1UL << ((bit) % BITS_PER_LONG)));
+}
+
+void qemu_map_cache_init(void)
+{
+ unsigned long size;
+ struct rlimit rlimit_as;
+
+ mapcache = qemu_mallocz(sizeof (MapCache));
+
+ QTAILQ_INIT(&mapcache->locked_entries);
+ mapcache->last_address_index = -1;
+
+ getrlimit(RLIMIT_AS, &rlimit_as);
+ rlimit_as.rlim_cur = rlimit_as.rlim_max;
+ setrlimit(RLIMIT_AS, &rlimit_as);
+ mapcache->max_mcache_size = rlimit_as.rlim_max;
+
+ mapcache->nr_buckets =
+ (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) +
+ (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >>
+ (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT));
+
+ size = mapcache->nr_buckets * sizeof (MapCacheEntry);
+ size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
+ DPRINTF("qemu_map_cache_init, nr_buckets = %lx size %lu\n", mapcache->nr_buckets, size);
+ mapcache->entry = qemu_mallocz(size);
+}
+
+static void qemu_remap_bucket(MapCacheEntry *entry,
+ target_phys_addr_t size,
+ target_phys_addr_t address_index)
+{
+ uint8_t *vaddr_base;
+ xen_pfn_t *pfns;
+ int *err;
+ unsigned int i, j;
+ target_phys_addr_t nb_pfn = size >> XC_PAGE_SHIFT;
+
+ trace_qemu_remap_bucket(address_index);
+
+ pfns = qemu_mallocz(nb_pfn * sizeof (xen_pfn_t));
+ err = qemu_mallocz(nb_pfn * sizeof (int));
+
+ if (entry->vaddr_base != NULL) {
+ if (munmap(entry->vaddr_base, size) != 0) {
+ perror("unmap fails");
+ exit(-1);
+ }
+ }
+
+ for (i = 0; i < nb_pfn; i++) {
+ pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i;
+ }
+
+ vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE,
+ pfns, err, nb_pfn);
+ if (vaddr_base == NULL) {
+ perror("xc_map_foreign_bulk");
+ exit(-1);
+ }
+
+ entry->vaddr_base = vaddr_base;
+ entry->paddr_index = address_index;
+
+ for (i = 0; i < nb_pfn; i += BITS_PER_LONG) {
+ unsigned long word = 0;
+ if ((i + BITS_PER_LONG) > nb_pfn) {
+ j = nb_pfn % BITS_PER_LONG;
+ } else {
+ j = BITS_PER_LONG;
+ }
+ while (j > 0) {
+ word = (word << 1) | !err[i + --j];
+ }
+ entry->valid_mapping[i / BITS_PER_LONG] = word;
+ }
+
+ qemu_free(pfns);
+ qemu_free(err);
+}
+
+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock)
+{
+ MapCacheEntry *entry, *pentry = NULL;
+ target_phys_addr_t address_index = phys_addr >> MCACHE_BUCKET_SHIFT;
+ target_phys_addr_t address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1);
+
+ trace_qemu_map_cache(phys_addr);
+
+ if (address_index == mapcache->last_address_index && !lock) {
+ trace_qemu_map_cache_return(mapcache->last_address_vaddr + address_offset);
+ return mapcache->last_address_vaddr + address_offset;
+ }
+
+ entry = &mapcache->entry[address_index % mapcache->nr_buckets];
+
+ while (entry && entry->lock && entry->paddr_index != address_index && entry->vaddr_base) {
+ pentry = entry;
+ entry = entry->next;
+ }
+ if (!entry) {
+ entry = qemu_mallocz(sizeof (MapCacheEntry));
+ pentry->next = entry;
+ qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index);
+ } else if (!entry->lock) {
+ if (!entry->vaddr_base || entry->paddr_index != address_index ||
+ !test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) {
+ qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index);
+ }
+ }
+
+ if (!test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) {
+ mapcache->last_address_index = -1;
+ trace_qemu_map_cache_return(NULL);
+ return NULL;
+ }
+
+ mapcache->last_address_index = address_index;
+ mapcache->last_address_vaddr = entry->vaddr_base;
+ if (lock) {
+ MapCacheRev *reventry = qemu_mallocz(sizeof(MapCacheRev));
+ entry->lock++;
+ reventry->vaddr_req = mapcache->last_address_vaddr + address_offset;
+ reventry->paddr_index = mapcache->last_address_index;
+ QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next);
+ }
+
+ trace_qemu_map_cache_return(mapcache->last_address_vaddr + address_offset);
+ return mapcache->last_address_vaddr + address_offset;
+}
+
+ram_addr_t qemu_ram_addr_from_mapcache(void *ptr)
+{
+ MapCacheRev *reventry;
+ target_phys_addr_t paddr_index;
+ int found = 0;
+
+ QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+ if (reventry->vaddr_req == ptr) {
+ paddr_index = reventry->paddr_index;
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ fprintf(stderr, "qemu_ram_addr_from_mapcache, could not find %p\n", ptr);
+ QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+ DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index,
+ reventry->vaddr_req);
+ }
+ abort();
+ return 0;
+ }
+
+ return paddr_index << MCACHE_BUCKET_SHIFT;
+}
+
+void qemu_invalidate_entry(uint8_t *buffer)
+{
+ MapCacheEntry *entry = NULL, *pentry = NULL;
+ MapCacheRev *reventry;
+ target_phys_addr_t paddr_index;
+ int found = 0;
+
+ if (mapcache->last_address_vaddr == buffer) {
+ mapcache->last_address_index = -1;
+ }
+
+ QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+ if (reventry->vaddr_req == buffer) {
+ paddr_index = reventry->paddr_index;
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ DPRINTF("qemu_invalidate_entry, could not find %p\n", buffer);
+ QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+ DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req);
+ }
+ return;
+ }
+ QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next);
+ qemu_free(reventry);
+
+ entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
+ while (entry && entry->paddr_index != paddr_index) {
+ pentry = entry;
+ entry = entry->next;
+ }
+ if (!entry) {
+ DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer);
+ return;
+ }
+ entry->lock--;
+ if (entry->lock > 0 || pentry == NULL) {
+ return;
+ }
+
+ pentry->next = entry->next;
+ if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) {
+ perror("unmap fails");
+ exit(-1);
+ }
+ qemu_free(entry);
+}
+
+void qemu_invalidate_map_cache(void)
+{
+ unsigned long i;
+ MapCacheRev *reventry;
+
+ /* Flush pending AIO before destroying the mapcache */
+ qemu_aio_flush();
+
+ QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+ DPRINTF("There should be no locked mappings at this time, "
+ "but "TARGET_FMT_plx" -> %p is present\n",
+ reventry->paddr_index, reventry->vaddr_req);
+ }
+
+ mapcache_lock();
+
+ for (i = 0; i < mapcache->nr_buckets; i++) {
+ MapCacheEntry *entry = &mapcache->entry[i];
+
+ if (entry->vaddr_base == NULL) {
+ continue;
+ }
+
+ if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) {
+ perror("unmap fails");
+ exit(-1);
+ }
+
+ entry->paddr_index = 0;
+ entry->vaddr_base = NULL;
+ }
+
+ mapcache->last_address_index = -1;
+ mapcache->last_address_vaddr = NULL;
+
+ mapcache_unlock();
+}
+
+uint8_t *xen_map_block(target_phys_addr_t phys_addr, target_phys_addr_t size)
+{
+ uint8_t *vaddr_base;
+ xen_pfn_t *pfns;
+ int *err;
+ unsigned int i;
+ target_phys_addr_t nb_pfn = size >> XC_PAGE_SHIFT;
+
+ trace_xen_map_block(phys_addr, size);
+ phys_addr >>= XC_PAGE_SHIFT;
+
+ pfns = qemu_mallocz(nb_pfn * sizeof (xen_pfn_t));
+ err = qemu_mallocz(nb_pfn * sizeof (int));
+
+ for (i = 0; i < nb_pfn; i++) {
+ pfns[i] = phys_addr + i;
+ }
+
+ vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE,
+ pfns, err, nb_pfn);
+ if (vaddr_base == NULL) {
+ perror("xc_map_foreign_bulk");
+ exit(-1);
+ }
+
+ qemu_free(pfns);
+ qemu_free(err);
+
+ return vaddr_base;
+}