/* * Xen leaves the responsibility for maintaining p2m mappings to the * guests themselves, but it must also access and update the p2m array * during suspend/resume when all the pages are reallocated. * * The p2m table is logically a flat array, but we implement it as a * three-level tree to allow the address space to be sparse. * * Xen * | * p2m_top p2m_top_mfn * / \ / \ * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn * / \ / \ / / * p2m p2m p2m p2m p2m p2m p2m ... * * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. * * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the * maximum representable pseudo-physical address space is: * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages * * P2M_PER_PAGE depends on the architecture, as a mfn is always * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to * 512 and 1024 entries respectively. */ #include #include #include #include #include #include #include #include "xen-ops.h" unsigned long xen_max_p2m_pfn __read_mostly; #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) /* Placeholders for holes in the address space */ static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); static inline unsigned p2m_top_index(unsigned long pfn) { BUG_ON(pfn >= MAX_P2M_PFN); return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); } static inline unsigned p2m_mid_index(unsigned long pfn) { return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; } static inline unsigned p2m_index(unsigned long pfn) { return pfn % P2M_PER_PAGE; } static void p2m_top_init(unsigned long ***top) { unsigned i; for (i = 0; i < P2M_TOP_PER_PAGE; i++) top[i] = p2m_mid_missing; } static void p2m_top_mfn_init(unsigned long *top) { unsigned i; for (i = 0; i < P2M_TOP_PER_PAGE; i++) top[i] = virt_to_mfn(p2m_mid_missing_mfn); } static void p2m_top_mfn_p_init(unsigned long **top) { unsigned i; for (i = 0; i < P2M_TOP_PER_PAGE; i++) top[i] = p2m_mid_missing_mfn; } static void p2m_mid_init(unsigned long **mid) { unsigned i; for (i = 0; i < P2M_MID_PER_PAGE; i++) mid[i] = p2m_missing; } static void p2m_mid_mfn_init(unsigned long *mid) { unsigned i; for (i = 0; i < P2M_MID_PER_PAGE; i++) mid[i] = virt_to_mfn(p2m_missing); } static void p2m_init(unsigned long *p2m) { unsigned i; for (i = 0; i < P2M_MID_PER_PAGE; i++) p2m[i] = INVALID_P2M_ENTRY; } /* * Build the parallel p2m_top_mfn and p2m_mid_mfn structures * * This is called both at boot time, and after resuming from suspend: * - At boot time we're called very early, and must use extend_brk() * to allocate memory. * * - After resume we're called from within stop_machine, but the mfn * tree should alreay be completely allocated. */ void xen_build_mfn_list_list(void) { unsigned long pfn; /* Pre-initialize p2m_top_mfn to be completely missing */ if (p2m_top_mfn == NULL) { p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_mid_mfn_init(p2m_mid_missing_mfn); p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_top_mfn_p_init(p2m_top_mfn_p); p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_top_mfn_init(p2m_top_mfn); } else { /* Reinitialise, mfn's all change after migration */ p2m_mid_mfn_init(p2m_mid_missing_mfn); } for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { unsigned topidx = p2m_top_index(pfn); unsigned mididx = p2m_mid_index(pfn); unsigned long **mid; unsigned long *mid_mfn_p; mid = p2m_top[topidx]; mid_mfn_p = p2m_top_mfn_p[topidx]; /* Don't bother allocating any mfn mid levels if * they're just missing, just update the stored mfn, * since all could have changed over a migrate. */ if (mid == p2m_mid_missing) { BUG_ON(mididx); BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; continue; } if (mid_mfn_p == p2m_mid_missing_mfn) { /* * XXX boot-time only! We should never find * missing parts of the mfn tree after * runtime. extend_brk() will BUG if we call * it too late. */ mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_mid_mfn_init(mid_mfn_p); p2m_top_mfn_p[topidx] = mid_mfn_p; } p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); } } void xen_setup_mfn_list_list(void) { BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = virt_to_mfn(p2m_top_mfn); HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; } /* Set up p2m_top to point to the domain-builder provided p2m pages */ void __init xen_build_dynamic_phys_to_machine(void) { unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); unsigned long pfn; xen_max_p2m_pfn = max_pfn; p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_init(p2m_missing); p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_mid_init(p2m_mid_missing); p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_top_init(p2m_top); /* * The domain builder gives us a pre-constructed p2m array in * mfn_list for all the pages initially given to us, so we just * need to graft that into our tree structure. */ for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { unsigned topidx = p2m_top_index(pfn); unsigned mididx = p2m_mid_index(pfn); if (p2m_top[topidx] == p2m_mid_missing) { unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_mid_init(mid); p2m_top[topidx] = mid; } p2m_top[topidx][mididx] = &mfn_list[pfn]; } } unsigned long get_phys_to_machine(unsigned long pfn) { unsigned topidx, mididx, idx; if (unlikely(pfn >= MAX_P2M_PFN)) return INVALID_P2M_ENTRY; topidx = p2m_top_index(pfn); mididx = p2m_mid_index(pfn); idx = p2m_index(pfn); return p2m_top[topidx][mididx][idx]; } EXPORT_SYMBOL_GPL(get_phys_to_machine); static void *alloc_p2m_page(void) { return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); } static void free_p2m_page(void *p) { free_page((unsigned long)p); } /* * Fully allocate the p2m structure for a given pfn. We need to check * that both the top and mid levels are allocated, and make sure the * parallel mfn tree is kept in sync. We may race with other cpus, so * the new pages are installed with cmpxchg; if we lose the race then * simply free the page we allocated and use the one that's there. */ static bool alloc_p2m(unsigned long pfn) { unsigned topidx, mididx; unsigned long ***top_p, **mid; unsigned long *top_mfn_p, *mid_mfn; topidx = p2m_top_index(pfn); mididx = p2m_mid_index(pfn); top_p = &p2m_top[topidx]; mid = *top_p; if (mid == p2m_mid_missing) { /* Mid level is missing, allocate a new one */ mid = alloc_p2m_page(); if (!mid) return false; p2m_mid_init(mid); if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) free_p2m_page(mid); } top_mfn_p = &p2m_top_mfn[topidx]; mid_mfn = p2m_top_mfn_p[topidx]; BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); if (mid_mfn == p2m_mid_missing_mfn) { /* Separately check the mid mfn level */ unsigned long missing_mfn; unsigned long mid_mfn_mfn; mid_mfn = alloc_p2m_page(); if (!mid_mfn) return false; p2m_mid_mfn_init(mid_mfn); missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); mid_mfn_mfn = virt_to_mfn(mid_mfn); if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) free_p2m_page(mid_mfn); else p2m_top_mfn_p[topidx] = mid_mfn; } if (p2m_top[topidx][mididx] == p2m_missing) { /* p2m leaf page is missing */ unsigned long *p2m; p2m = alloc_p2m_page(); if (!p2m) return false; p2m_init(p2m); if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) free_p2m_page(p2m); else mid_mfn[mididx] = virt_to_mfn(p2m); } return true; } /* Try to install p2m mapping; fail if intermediate bits missing */ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) { unsigned topidx, mididx, idx; if (unlikely(pfn >= MAX_P2M_PFN)) { BUG_ON(mfn != INVALID_P2M_ENTRY); return true; } topidx = p2m_top_index(pfn); mididx = p2m_mid_index(pfn); idx = p2m_index(pfn); if (p2m_top[topidx][mididx] == p2m_missing) return mfn == INVALID_P2M_ENTRY; p2m_top[topidx][mididx][idx] = mfn; return true; } bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) { if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); return true; } if (unlikely(!__set_phys_to_machine(pfn, mfn))) { if (!alloc_p2m(pfn)) return false; if (!__set_phys_to_machine(pfn, mfn)) return false; } return true; }