aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-24 16:02:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-24 16:02:08 -0700
commitb5f4035adfffbcc6b478de5b8c44b618b3124aff (patch)
treee7a5f011d8aaf5c95edf933f98f25dfc8fa46837
parentce004178be1bbaa292e9e6497939e2970300095a (diff)
parent68c2c39a76b094e9b2773e5846424ea674bf2c46 (diff)
Merge tag 'stable/for-linus-3.5-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
Pull Xen updates from Konrad Rzeszutek Wilk: "Features: * Extend the APIC ops implementation and add IRQ_WORKER vector support so that 'perf' can work properly. * Fix self-ballooning code, and balloon logic when booting as initial domain. * Move array printing code to generic debugfs * Support XenBus domains. * Lazily free grants when a domain is dead/non-existent. * In M2P code use batching calls Bug-fixes: * Fix NULL dereference in allocation failure path (hvc_xen) * Fix unbinding of IRQ_WORKER vector during vCPU hot-unplug * Fix HVM guest resume - we would leak an PIRQ value instead of reusing the existing one." Fix up add-add onflicts in arch/x86/xen/enlighten.c due to addition of apic ipi interface next to the new apic_id functions. * tag 'stable/for-linus-3.5-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: xen: do not map the same GSI twice in PVHVM guests. hvc_xen: NULL dereference on allocation failure xen: Add selfballoning memory reservation tunable. xenbus: Add support for xenbus backend in stub domain xen/smp: unbind irqworkX when unplugging vCPUs. xen: enter/exit lazy_mmu_mode around m2p_override calls xen/acpi/sleep: Enable ACPI sleep via the __acpi_os_prepare_sleep xen: implement IRQ_WORK_VECTOR handler xen: implement apic ipi interface xen/setup: update VA mapping when releasing memory during setup xen/setup: Combine the two hypercall functions - since they are quite similar. xen/setup: Populate freed MFNs from non-RAM E820 entries and gaps to E820 RAM xen/setup: Only print "Freeing XXX-YYY pfn range: Z pages freed" if Z > 0 xen/gnttab: add deferred freeing logic debugfs: Add support to print u32 array in debugfs xen/p2m: An early bootup variant of set_phys_to_machine xen/p2m: Collapse early_alloc_p2m_middle redundant checks. xen/p2m: Allow alloc_p2m_middle to call reserve_brk depending on argument xen/p2m: Move code around to allow for better re-usage.
-rw-r--r--arch/x86/include/asm/xen/events.h1
-rw-r--r--arch/x86/include/asm/xen/page.h1
-rw-r--r--arch/x86/pci/xen.c4
-rw-r--r--arch/x86/xen/debugfs.c104
-rw-r--r--arch/x86/xen/debugfs.h4
-rw-r--r--arch/x86/xen/enlighten.c13
-rw-r--r--arch/x86/xen/mmu.c23
-rw-r--r--arch/x86/xen/p2m.c104
-rw-r--r--arch/x86/xen/setup.c171
-rw-r--r--arch/x86/xen/smp.c112
-rw-r--r--arch/x86/xen/smp.h12
-rw-r--r--arch/x86/xen/spinlock.c12
-rw-r--r--arch/x86/xen/xen-ops.h1
-rw-r--r--drivers/xen/Makefile2
-rw-r--r--drivers/xen/acpi.c62
-rw-r--r--drivers/xen/events.c5
-rw-r--r--drivers/xen/grant-table.c125
-rw-r--r--drivers/xen/xen-selfballoon.c34
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c6
-rw-r--r--drivers/xen/xenbus/xenbus_comms.h1
-rw-r--r--drivers/xen/xenbus/xenbus_dev_backend.c51
-rw-r--r--fs/debugfs/file.c128
-rw-r--r--include/linux/debugfs.h11
-rw-r--r--include/xen/acpi.h58
-rw-r--r--include/xen/events.h3
-rw-r--r--include/xen/grant_table.h2
-rw-r--r--include/xen/xenbus_dev.h3
27 files changed, 827 insertions, 226 deletions
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index 1df35417c412..cc146d51449e 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -6,6 +6,7 @@ enum ipi_vector {
XEN_CALL_FUNCTION_VECTOR,
XEN_CALL_FUNCTION_SINGLE_VECTOR,
XEN_SPIN_UNLOCK_VECTOR,
+ XEN_IRQ_WORK_VECTOR,
XEN_NR_IPIS,
};
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index c34f96c2f7a0..93971e841dd5 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -44,6 +44,7 @@ extern unsigned long machine_to_phys_nr;
extern unsigned long get_phys_to_machine(unsigned long pfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+extern bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern unsigned long set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 7415aa927913..56ab74989cf1 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -64,6 +64,10 @@ static int xen_register_pirq(u32 gsi, int gsi_override, int triggering,
int shareable = 0;
char *name;
+ irq = xen_irq_from_gsi(gsi);
+ if (irq > 0)
+ return irq;
+
if (set_pirq)
pirq = gsi;
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index ef1db1900d86..c8377fb26cdf 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -19,107 +19,3 @@ struct dentry * __init xen_init_debugfs(void)
return d_xen_debug;
}
-struct array_data
-{
- void *array;
- unsigned elements;
-};
-
-static int u32_array_open(struct inode *inode, struct file *file)
-{
- file->private_data = NULL;
- return nonseekable_open(inode, file);
-}
-
-static size_t format_array(char *buf, size_t bufsize, const char *fmt,
- u32 *array, unsigned array_size)
-{
- size_t ret = 0;
- unsigned i;
-
- for(i = 0; i < array_size; i++) {
- size_t len;
-
- len = snprintf(buf, bufsize, fmt, array[i]);
- len++; /* ' ' or '\n' */
- ret += len;
-
- if (buf) {
- buf += len;
- bufsize -= len;
- buf[-1] = (i == array_size-1) ? '\n' : ' ';
- }
- }
-
- ret++; /* \0 */
- if (buf)
- *buf = '\0';
-
- return ret;
-}
-
-static char *format_array_alloc(const char *fmt, u32 *array, unsigned array_size)
-{
- size_t len = format_array(NULL, 0, fmt, array, array_size);
- char *ret;
-
- ret = kmalloc(len, GFP_KERNEL);
- if (ret == NULL)
- return NULL;
-
- format_array(ret, len, fmt, array, array_size);
- return ret;
-}
-
-static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
- loff_t *ppos)
-{
- struct inode *inode = file->f_path.dentry->d_inode;
- struct array_data *data = inode->i_private;
- size_t size;
-
- if (*ppos == 0) {
- if (file->private_data) {
- kfree(file->private_data);
- file->private_data = NULL;
- }
-
- file->private_data = format_array_alloc("%u", data->array, data->elements);
- }
-
- size = 0;
- if (file->private_data)
- size = strlen(file->private_data);
-
- return simple_read_from_buffer(buf, len, ppos, file->private_data, size);
-}
-
-static int xen_array_release(struct inode *inode, struct file *file)
-{
- kfree(file->private_data);
-
- return 0;
-}
-
-static const struct file_operations u32_array_fops = {
- .owner = THIS_MODULE,
- .open = u32_array_open,
- .release= xen_array_release,
- .read = u32_array_read,
- .llseek = no_llseek,
-};
-
-struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode,
- struct dentry *parent,
- u32 *array, unsigned elements)
-{
- struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL);
-
- if (data == NULL)
- return NULL;
-
- data->array = array;
- data->elements = elements;
-
- return debugfs_create_file(name, mode, parent, data, &u32_array_fops);
-}
diff --git a/arch/x86/xen/debugfs.h b/arch/x86/xen/debugfs.h
index 78d25499be5b..12ebf3325c7b 100644
--- a/arch/x86/xen/debugfs.h
+++ b/arch/x86/xen/debugfs.h
@@ -3,8 +3,4 @@
struct dentry * __init xen_init_debugfs(void);
-struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode,
- struct dentry *parent,
- u32 *array, unsigned elements);
-
#endif /* _XEN_DEBUGFS_H */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c0f5facdb10c..75f33b2a5933 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -42,6 +42,7 @@
#include <xen/page.h>
#include <xen/hvm.h>
#include <xen/hvc-console.h>
+#include <xen/acpi.h>
#include <asm/paravirt.h>
#include <asm/apic.h>
@@ -75,6 +76,7 @@
#include "xen-ops.h"
#include "mmu.h"
+#include "smp.h"
#include "multicalls.h"
EXPORT_SYMBOL_GPL(hypercall_page);
@@ -883,6 +885,14 @@ static void set_xen_basic_apic_ops(void)
apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle;
apic->set_apic_id = xen_set_apic_id;
apic->get_apic_id = xen_get_apic_id;
+
+#ifdef CONFIG_SMP
+ apic->send_IPI_allbutself = xen_send_IPI_allbutself;
+ apic->send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself;
+ apic->send_IPI_mask = xen_send_IPI_mask;
+ apic->send_IPI_all = xen_send_IPI_all;
+ apic->send_IPI_self = xen_send_IPI_self;
+#endif
}
#endif
@@ -1340,7 +1350,6 @@ asmlinkage void __init xen_start_kernel(void)
xen_raw_console_write("mapping kernel into physical memory\n");
pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
- xen_ident_map_ISA();
/* Allocate and initialize top and mid mfn levels for p2m structure */
xen_build_mfn_list_list();
@@ -1400,6 +1409,8 @@ asmlinkage void __init xen_start_kernel(void)
/* Make sure ACS will be enabled */
pci_request_acs();
+
+ xen_acpi_sleep_register();
}
#ifdef CONFIG_PCI
/* PCI BIOS service won't work from a PV guest. */
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3506cd4f9a43..3a73785631ce 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1933,29 +1933,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#endif
}
-void __init xen_ident_map_ISA(void)
-{
- unsigned long pa;
-
- /*
- * If we're dom0, then linear map the ISA machine addresses into
- * the kernel's address space.
- */
- if (!xen_initial_domain())
- return;
-
- xen_raw_printk("Xen: setup ISA identity maps\n");
-
- for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) {
- pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO);
-
- if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0))
- BUG();
- }
-
- xen_flush_tlb();
-}
-
static void __init xen_post_allocator_init(void)
{
pv_mmu_ops.set_pte = xen_set_pte;
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 1b267e75158d..ffd08c414e91 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -499,16 +499,18 @@ static bool alloc_p2m(unsigned long pfn)
return true;
}
-static bool __init __early_alloc_p2m(unsigned long pfn)
+static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary)
{
unsigned topidx, mididx, idx;
+ unsigned long *p2m;
+ unsigned long *mid_mfn_p;
topidx = p2m_top_index(pfn);
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
/* Pfff.. No boundary cross-over, lets get out. */
- if (!idx)
+ if (!idx && check_boundary)
return false;
WARN(p2m_top[topidx][mididx] == p2m_identity,
@@ -522,24 +524,66 @@ static bool __init __early_alloc_p2m(unsigned long pfn)
return false;
/* Boundary cross-over for the edges: */
- if (idx) {
- unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
- unsigned long *mid_mfn_p;
+ p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_init(p2m);
+ p2m_init(p2m);
- p2m_top[topidx][mididx] = p2m;
+ p2m_top[topidx][mididx] = p2m;
- /* For save/restore we need to MFN of the P2M saved */
-
- mid_mfn_p = p2m_top_mfn_p[topidx];
- WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing),
- "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n",
- topidx, mididx);
- mid_mfn_p[mididx] = virt_to_mfn(p2m);
+ /* For save/restore we need to MFN of the P2M saved */
+
+ mid_mfn_p = p2m_top_mfn_p[topidx];
+ WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing),
+ "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n",
+ topidx, mididx);
+ mid_mfn_p[mididx] = virt_to_mfn(p2m);
+
+ return true;
+}
+
+static bool __init early_alloc_p2m(unsigned long pfn)
+{
+ unsigned topidx = p2m_top_index(pfn);
+ unsigned long *mid_mfn_p;
+ unsigned long **mid;
+
+ mid = p2m_top[topidx];
+ mid_mfn_p = p2m_top_mfn_p[topidx];
+ if (mid == p2m_mid_missing) {
+ mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
+
+ p2m_mid_init(mid);
+
+ p2m_top[topidx] = mid;
+ BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
}
- return idx != 0;
+ /* And the save/restore P2M tables.. */
+ if (mid_mfn_p == p2m_mid_missing_mfn) {
+ mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_mid_mfn_init(mid_mfn_p);
+
+ p2m_top_mfn_p[topidx] = mid_mfn_p;
+ p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
+ /* Note: we don't set mid_mfn_p[midix] here,
+ * look in early_alloc_p2m_middle */
+ }
+ return true;
+}
+bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+ if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
+ if (!early_alloc_p2m(pfn))
+ return false;
+
+ if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/))
+ return false;
+
+ if (!__set_phys_to_machine(pfn, mfn))
+ return false;
+ }
+
+ return true;
}
unsigned long __init set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e)
@@ -559,35 +603,11 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
{
- unsigned topidx = p2m_top_index(pfn);
- unsigned long *mid_mfn_p;
- unsigned long **mid;
-
- mid = p2m_top[topidx];
- mid_mfn_p = p2m_top_mfn_p[topidx];
- if (mid == p2m_mid_missing) {
- mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
-
- p2m_mid_init(mid);
-
- p2m_top[topidx] = mid;
-
- BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
- }
- /* And the save/restore P2M tables.. */
- if (mid_mfn_p == p2m_mid_missing_mfn) {
- mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_mid_mfn_init(mid_mfn_p);
-
- p2m_top_mfn_p[topidx] = mid_mfn_p;
- p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
- /* Note: we don't set mid_mfn_p[midix] here,
- * look in __early_alloc_p2m */
- }
+ WARN_ON(!early_alloc_p2m(pfn));
}
- __early_alloc_p2m(pfn_s);
- __early_alloc_p2m(pfn_e);
+ early_alloc_p2m_middle(pfn_s, true);
+ early_alloc_p2m_middle(pfn_e, true);
for (pfn = pfn_s; pfn < pfn_e; pfn++)
if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 1ba8dff26753..3ebba0753d38 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -26,7 +26,6 @@
#include <xen/interface/memory.h>
#include <xen/interface/physdev.h>
#include <xen/features.h>
-
#include "xen-ops.h"
#include "vdso.h"
@@ -84,8 +83,8 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
-static unsigned long __init xen_release_chunk(unsigned long start,
- unsigned long end)
+static unsigned long __init xen_do_chunk(unsigned long start,
+ unsigned long end, bool release)
{
struct xen_memory_reservation reservation = {
.address_bits = 0,
@@ -96,30 +95,138 @@ static unsigned long __init xen_release_chunk(unsigned long start,
unsigned long pfn;
int ret;
- for(pfn = start; pfn < end; pfn++) {
+ for (pfn = start; pfn < end; pfn++) {
+ unsigned long frame;
unsigned long mfn = pfn_to_mfn(pfn);
- /* Make sure pfn exists to start with */
- if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
- continue;
-
- set_xen_guest_handle(reservation.extent_start, &mfn);
+ if (release) {
+ /* Make sure pfn exists to start with */
+ if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
+ continue;
+ frame = mfn;
+ } else {
+ if (mfn != INVALID_P2M_ENTRY)
+ continue;
+ frame = pfn;
+ }
+ set_xen_guest_handle(reservation.extent_start, &frame);
reservation.nr_extents = 1;
- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap,
&reservation);
- WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
+ WARN(ret != 1, "Failed to %s pfn %lx err=%d\n",
+ release ? "release" : "populate", pfn, ret);
+
if (ret == 1) {
- __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) {
+ if (release)
+ break;
+ set_xen_guest_handle(reservation.extent_start, &frame);
+ reservation.nr_extents = 1;
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ &reservation);
+ break;
+ }
len++;
- }
+ } else
+ break;
}
- printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n",
- start, end, len);
+ if (len)
+ printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n",
+ release ? "Freeing" : "Populating",
+ start, end, len,
+ release ? "freed" : "added");
return len;
}
+static unsigned long __init xen_release_chunk(unsigned long start,
+ unsigned long end)
+{
+ return xen_do_chunk(start, end, true);
+}
+
+static unsigned long __init xen_populate_chunk(
+ const struct e820entry *list, size_t map_size,
+ unsigned long max_pfn, unsigned long *last_pfn,
+ unsigned long credits_left)
+{
+ const struct e820entry *entry;
+ unsigned int i;
+ unsigned long done = 0;
+ unsigned long dest_pfn;
+
+ for (i = 0, entry = list; i < map_size; i++, entry++) {
+ unsigned long credits = credits_left;
+ unsigned long s_pfn;
+ unsigned long e_pfn;
+ unsigned long pfns;
+ long capacity;
+
+ if (credits <= 0)
+ break;
+
+ if (entry->type != E820_RAM)
+ continue;
+
+ e_pfn = PFN_UP(entry->addr + entry->size);
+
+ /* We only care about E820 after the xen_start_info->nr_pages */
+ if (e_pfn <= max_pfn)
+ continue;
+
+ s_pfn = PFN_DOWN(entry->addr);
+ /* If the E820 falls within the nr_pages, we want to start
+ * at the nr_pages PFN.
+ * If that would mean going past the E820 entry, skip it
+ */
+ if (s_pfn <= max_pfn) {
+ capacity = e_pfn - max_pfn;
+ dest_pfn = max_pfn;
+ } else {
+ /* last_pfn MUST be within E820_RAM regions */
+ if (*last_pfn && e_pfn >= *last_pfn)
+ s_pfn = *last_pfn;
+ capacity = e_pfn - s_pfn;
+ dest_pfn = s_pfn;
+ }
+ /* If we had filled this E820_RAM entry, go to the next one. */
+ if (capacity <= 0)
+ continue;
+
+ if (credits > capacity)
+ credits = capacity;
+
+ pfns = xen_do_chunk(dest_pfn, dest_pfn + credits, false);
+ done += pfns;
+ credits_left -= pfns;
+ *last_pfn = (dest_pfn + pfns);
+ }
+ return done;
+}
+
+static void __init xen_set_identity_and_release_chunk(
+ unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
+ unsigned long *released, unsigned long *identity)
+{
+ unsigned long pfn;
+
+ /*
+ * If the PFNs are currently mapped, the VA mapping also needs
+ * to be updated to be 1:1.
+ */
+ for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
+ (void)HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ mfn_pte(pfn, PAGE_KERNEL_IO), 0);
+
+ if (start_pfn < nr_pages)
+ *released += xen_release_chunk(
+ start_pfn, min(end_pfn, nr_pages));
+
+ *identity += set_phys_range_identity(start_pfn, end_pfn);
+}
+
static unsigned long __init xen_set_identity_and_release(
const struct e820entry *list, size_t map_size, unsigned long nr_pages)
{
@@ -142,7 +249,6 @@ static unsigned long __init xen_set_identity_and_release(
*/
for (i = 0, entry = list; i < map_size; i++, entry++) {
phys_addr_t end = entry->addr + entry->size;
-
if (entry->type == E820_RAM || i == map_size - 1) {
unsigned long start_pfn = PFN_DOWN(start);
unsigned long end_pfn = PFN_UP(end);
@@ -150,20 +256,19 @@ static unsigned long __init xen_set_identity_and_release(
if (entry->type == E820_RAM)
end_pfn = PFN_UP(entry->addr);
- if (start_pfn < end_pfn) {
- if (start_pfn < nr_pages)
- released += xen_release_chunk(
- start_pfn, min(end_pfn, nr_pages));
+ if (start_pfn < end_pfn)
+ xen_set_identity_and_release_chunk(
+ start_pfn, end_pfn, nr_pages,
+ &released, &identity);
- identity += set_phys_range_identity(
- start_pfn, end_pfn);
- }
start = end;
}
}
- printk(KERN_INFO "Released %lu pages of unused memory\n", released);
- printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
+ if (released)
+ printk(KERN_INFO "Released %lu pages of unused memory\n", released);
+ if (identity)
+ printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
return released;
}
@@ -217,7 +322,9 @@ char * __init xen_memory_setup(void)
int rc;
struct xen_memory_map memmap;
unsigned long max_pages;
+ unsigned long last_pfn = 0;
unsigned long extra_pages = 0;
+ unsigned long populated;
int i;
int op;
@@ -257,9 +364,20 @@ char * __init xen_memory_setup(void)
*/
xen_released_pages = xen_set_identity_and_release(
map, memmap.nr_entries, max_pfn);
- extra_pages += xen_released_pages;
/*
+ * Populate back the non-RAM pages and E820 gaps that had been
+ * released. */
+ populated = xen_populate_chunk(map, memmap.nr_entries,
+ max_pfn, &last_pfn, xen_released_pages);
+
+ extra_pages += (xen_released_pages - populated);
+
+ if (last_pfn > max_pfn) {
+ max_pfn = min(MAX_DOMAIN_PAGES, last_pfn);
+ mem_end = PFN_PHYS(max_pfn);
+ }
+ /*
* Clamp the amount of extra memory to a EXTRA_MEM_RATIO
* factor the base size. On non-highmem systems, the base
* size is the full initial memory allocation; on highmem it
@@ -272,7 +390,6 @@ char * __init xen_memory_setup(void)
*/
extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
extra_pages);
-
i = 0;
while (i < memmap.nr_entries) {
u64 addr = map[i].addr;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 3700945ed0d5..afb250d22a6b 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -16,6 +16,7 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/smp.h>
+#include <linux/irq_work.h>
#include <asm/paravirt.h>
#include <asm/desc.h>
@@ -41,10 +42,12 @@ cpumask_var_t xen_cpu_initialized_map;
static DEFINE_PER_CPU(int, xen_resched_irq);
static DEFINE_PER_CPU(int, xen_callfunc_irq);
static DEFINE_PER_CPU(int, xen_callfuncsingle_irq);
+static DEFINE_PER_CPU(int, xen_irq_work);
static DEFINE_PER_CPU(int, xen_debug_irq) = -1;
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
+static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id);
/*
* Reschedule call back.
@@ -143,6 +146,17 @@ static int xen_smp_intr_init(unsigned int cpu)
goto fail;
per_cpu(xen_callfuncsingle_irq, cpu) = rc;
+ callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
+ rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR,
+ cpu,
+ xen_irq_work_interrupt,
+ IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+ callfunc_name,
+ NULL);
+ if (rc < 0)
+ goto fail;
+ per_cpu(xen_irq_work, cpu) = rc;
+
return 0;
fail:
@@ -155,6 +169,8 @@ static int xen_smp_intr_init(unsigned int cpu)
if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0)
unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu),
NULL);
+ if (per_cpu(xen_irq_work, cpu) >= 0)
+ unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
return rc;
}
@@ -407,6 +423,7 @@ static void xen_cpu_die(unsigned int cpu)
unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
@@ -469,8 +486,8 @@ static void xen_smp_send_reschedule(int cpu)
xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
}
-static void xen_send_IPI_mask(const struct cpumask *mask,
- enum ipi_vector vector)
+static void __xen_send_IPI_mask(const struct cpumask *mask,
+ int vector)
{
unsigned cpu;
@@ -482,7 +499,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
{
int cpu;
- xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
+ __xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
/* Make sure other vcpus get a chance to run if they need to. */
for_each_cpu(cpu, mask) {
@@ -495,10 +512,86 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
static void xen_smp_send_call_function_single_ipi(int cpu)
{
- xen_send_IPI_mask(cpumask_of(cpu),
+ __xen_send_IPI_mask(cpumask_of(cpu),
XEN_CALL_FUNCTION_SINGLE_VECTOR);
}
+static inline int xen_map_vector(int vector)
+{
+ int xen_vector;
+
+ switch (vector) {
+ case RESCHEDULE_VECTOR:
+ xen_vector = XEN_RESCHEDULE_VECTOR;
+ break;
+ case CALL_FUNCTION_VECTOR:
+ xen_vector = XEN_CALL_FUNCTION_VECTOR;
+ break;
+ case CALL_FUNCTION_SINGLE_VECTOR:
+ xen_vector = XEN_CALL_FUNCTION_SINGLE_VECTOR;
+ break;
+ case IRQ_WORK_VECTOR:
+ xen_vector = XEN_IRQ_WORK_VECTOR;
+ break;
+ default:
+ xen_vector = -1;
+ printk(KERN_ERR "xen: vector 0x%x is not implemented\n",
+ vector);
+ }
+
+ return xen_vector;
+}
+
+void xen_send_IPI_mask(const struct cpumask *mask,
+ int vector)
+{
+ int xen_vector = xen_map_vector(vector);
+
+ if (xen_vector >= 0)
+ __xen_send_IPI_mask(mask, xen_vector);
+}
+
+void xen_send_IPI_all(int vector)
+{
+ int xen_vector = xen_map_vector(vector);
+
+ if (xen_vector >= 0)
+ __xen_send_IPI_mask(cpu_online_mask, xen_vector);
+}
+
+void xen_send_IPI_self(int vector)
+{
+ int xen_vector = xen_map_vector(vector);
+
+ if (xen_vector >= 0)
+ xen_send_IPI_one(smp_processor_id(), xen_vector);
+}
+
+void xen_send_IPI_mask_allbutself(const struct cpumask *mask,
+ int vector)
+{
+ unsigned cpu;
+ unsigned int this_cpu = smp_processor_id();
+
+ if (!(num_online_cpus() > 1))
+ return;
+
+ for_each_cpu_and(cpu, mask, cpu_online_mask) {
+ if (this_cpu == cpu)
+ continue;
+
+ xen_smp_send_call_function_single_ipi(cpu);
+ }
+}
+
+void xen_send_IPI_allbutself(int vector)
+{
+ int xen_vector = xen_map_vector(vector);
+
+ if (xen_vector >= 0)
+ xen_send_IPI_mask_allbutself(cpu_online_mask, xen_vector);
+}
+
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
{
irq_enter();
@@ -519,6 +612,16 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
+static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
+{
+ irq_enter();
+ irq_work_run();
+ inc_irq_stat(apic_irq_work_irqs);
+ irq_exit();
+
+ return IRQ_HANDLED;
+}
+
static const struct smp_ops xen_smp_ops __initconst = {
.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
.smp_prepare_cpus = xen_smp_prepare_cpus,
@@ -565,6 +668,7 @@ static void xen_hvm_cpu_die(unsigned int cpu)
unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
native_cpu_die(cpu);
}
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
new file mode 100644
index 000000000000..8981a76d081a
--- /dev/null
+++ b/arch/x86/xen/smp.h
@@ -0,0 +1,12 @@
+#ifndef _XEN_SMP_H
+
+extern void xen_send_IPI_mask(const struct cpumask *mask,
+ int vector);
+extern void xen_send_IPI_mask_allbutself(const struct cpumask *mask,
+ int vector);
+extern void xen_send_IPI_allbutself(int vector);
+extern void physflat_send_IPI_allbutself(int vector);
+extern void xen_send_IPI_all(int vector);
+extern void xen_send_IPI_self(int vector);
+
+#endif
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index d69cc6c3f808..83e866d714ce 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -440,12 +440,12 @@ static int __init xen_spinlock_debugfs(void)
debugfs_create_u64("time_total", 0444, d_spin_debug,
&spinlock_stats.time_total);
- xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
- spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
- xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
- spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
- xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
- spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
+ debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
+ spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
+ debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
+ spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
+ debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
+ spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
return 0;
}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 45c0c0667bd9..202d4c150154 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -28,7 +28,6 @@ void xen_setup_shared_info(void);
void xen_build_mfn_list_list(void);
void xen_setup_machphys_mapping(void);
pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
-void xen_ident_map_ISA(void);
void xen_reserve_top(void);
extern unsigned long xen_max_p2m_pfn;
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 9adc5be57b13..fc3488631136 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
obj-$(CONFIG_XEN_PVHVM) += platform-pci.o
obj-$(CONFIG_XEN_TMEM) += tmem.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
-obj-$(CONFIG_XEN_DOM0) += pci.o
+obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o
diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c
new file mode 100644
index 000000000000..119d42a2bf57
--- /dev/null
+++ b/drivers/xen/acpi.c
@@ -0,0 +1,62 @@
+/******************************************************************************
+ * acpi.c
+ * acpi file for domain 0 kernel
+ *
+ * Copyright (c) 2011 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+ * Copyright (c) 2011 Yu Ke ke.yu@intel.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <xen/acpi.h>
+#include <xen/interface/platform.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+int xen_acpi_notify_hypervisor_state(u8 sleep_state,
+ u32 pm1a_cnt, u32 pm1b_cnt)
+{
+ struct xen_platform_op op = {
+ .cmd = XENPF_enter_acpi_sleep,
+ .interface_version = XENPF_INTERFACE_VERSION,
+ .u = {
+ .enter_acpi_sleep = {
+ .pm1a_cnt_val = (u16)pm1a_cnt,
+ .pm1b_cnt_val = (u16)pm1b_cnt,
+ .sleep_state = sleep_state,
+ },
+ },
+ };
+
+ if ((pm1a_cnt & 0xffff0000) || (pm1b_cnt & 0xffff0000)) {
+ WARN(1, "Using more than 16bits of PM1A/B 0x%x/0x%x!"
+ "Email xen-devel@lists.xensource.com Thank you.\n", \
+ pm1a_cnt, pm1b_cnt);
+ return -1;
+ }
+
+ HYPERVISOR_dom0_op(&op);
+ return 1;
+}
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 0a8a17cd80be..6908e4ce2a0d 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -611,7 +611,7 @@ static void disable_pirq(struct irq_data *data)
disable_dynirq(data);
}
-static int find_irq_by_gsi(unsigned gsi)
+int xen_irq_from_gsi(unsigned gsi)
{
struct irq_info *info;
@@ -625,6 +625,7 @@ static int find_irq_by_gsi(unsigned gsi)
return -1;
}
+EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
/*
* Do not make any assumptions regarding the relationship between the
@@ -644,7 +645,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
mutex_lock(&irq_mapping_update_lock);
- irq = find_irq_by_gsi(gsi);
+ irq = xen_irq_from_gsi(gsi);
if (irq != -1) {
printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n",
irq, gsi);
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index f100ce20b16b..0bfc1ef11259 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -38,6 +38,7 @@
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
#include <linux/io.h>
+#include <linux/hardirq.h>
#include <xen/xen.h>
#include <xen/interface/xen.h>
@@ -426,10 +427,8 @@ static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
nflags = *pflags;
do {
flags = nflags;
- if (flags & (GTF_reading|GTF_writing)) {
- printk(KERN_ALERT "WARNING: g.e. still in use!\n");
+ if (flags & (GTF_reading|GTF_writing))
return 0;
- }
} while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags);
return 1;
@@ -458,12 +457,103 @@ static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly)
return 1;
}
-int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
+static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
{
return gnttab_interface->end_foreign_access_ref(ref, readonly);
}
+
+int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
+{
+ if (_gnttab_end_foreign_access_ref(ref, readonly))
+ return 1;
+ pr_warn("WARNING: g.e. %#x still in use!\n", ref);
+ return 0;
+}
EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
+struct deferred_entry {
+ struct list_head list;
+ grant_ref_t ref;
+ bool ro;
+ uint16_t warn_delay;
+ struct page *page;
+};
+static LIST_HEAD(deferred_list);
+static void gnttab_handle_deferred(unsigned long);
+static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred, 0, 0);
+
+static void gnttab_handle_deferred(unsigned long unused)
+{
+ unsigned int nr = 10;
+ struct deferred_entry *first = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ while (nr--) {
+ struct deferred_entry *entry
+ = list_first_entry(&deferred_list,
+ struct deferred_entry, list);
+
+ if (entry == first)
+ break;
+ list_del(&entry->list);
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+ if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) {
+ put_free_entry(entry->ref);
+ if (entry->page) {
+ pr_debug("freeing g.e. %#x (pfn %#lx)\n",
+ entry->ref, page_to_pfn(entry->page));
+ __free_page(entry->page);
+ } else
+ pr_info("freeing g.e. %#x\n", entry->ref);
+ kfree(entry);
+ entry = NULL;
+ } else {
+ if (!--entry->warn_delay)
+ pr_info("g.e. %#x still pending\n",
+ entry->ref);
+ if (!first)
+ first = entry;
+ }
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ if (entry)
+ list_add_tail(&entry->list, &deferred_list);
+ else if (list_empty(&deferred_list))
+ break;
+ }
+ if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
+ deferred_timer.expires = jiffies + HZ;
+ add_timer(&deferred_timer);
+ }
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+
+static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
+ struct page *page)
+{
+ struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+ const char *what = KERN_WARNING "leaking";
+
+ if (entry) {
+ unsigned long flags;
+
+ entry->ref = ref;
+ entry->ro = readonly;
+ entry->page = page;
+ entry->warn_delay = 60;
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ list_add_tail(&entry->list, &deferred_list);
+ if (!timer_pending(&deferred_timer)) {
+ deferred_timer.expires = jiffies + HZ;
+ add_timer(&deferred_timer);
+ }
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+ what = KERN_DEBUG "deferring";
+ }
+ printk("%s g.e. %#x (pfn %#lx)\n",
+ what, ref, page ? page_to_pfn(page) : -1);
+}
+
void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
unsigned long page)
{
@@ -471,12 +561,9 @@ void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
put_free_entry(ref);
if (page != 0)
free_page(page);
- } else {
- /* XXX This needs to be fixed so that the ref and page are
- placed on a list to be freed up later. */
- printk(KERN_WARNING
- "WARNING: leaking g.e. and page still in use!\n");
- }
+ } else
+ gnttab_add_deferred(ref, readonly,
+ page ? virt_to_page(page) : NULL);
}
EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
@@ -741,6 +828,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
struct page **pages, unsigned int count)
{
int i, ret;
+ bool lazy = false;
pte_t *pte;
unsigned long mfn;
@@ -751,6 +839,11 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
if (xen_feature(XENFEAT_auto_translated_physmap))
return ret;
+ if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
+ arch_enter_lazy_mmu_mode();
+ lazy = true;
+ }
+
for (i = 0; i < count; i++) {
/* Do not add to override if the map failed. */
if (map_ops[i].status)
@@ -769,6 +862,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
return ret;
}
+ if (lazy)
+ arch_leave_lazy_mmu_mode();
+
return ret;
}
EXPORT_SYMBOL_GPL(gnttab_map_refs);
@@ -777,6 +873,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
struct page **pages, unsigned int count, bool clear_pte)
{
int i, ret;
+ bool lazy = false;
ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
if (ret)
@@ -785,12 +882,20 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
if (xen_feature(XENFEAT_auto_translated_physmap))
return ret;
+ if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
+ arch_enter_lazy_mmu_mode();
+ lazy = true;
+ }
+
for (i = 0; i < count; i++) {
ret = m2p_remove_override(pages[i], clear_pte);
if (ret)
return ret;
}
+ if (lazy)
+ arch_leave_lazy_mmu_mode();
+
return ret;
}
EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 146c94897016..7d041cb6da26 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -105,6 +105,12 @@ static unsigned int selfballoon_interval __read_mostly = 5;
*/
static unsigned int selfballoon_min_usable_mb;
+/*
+ * Amount of RAM in MB to add to the target number of pages.
+ * Can be used to reserve some more room for caches and the like.
+ */
+static unsigned int selfballoon_reserved_mb;
+
static void selfballoon_process(struct work_struct *work);
static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
@@ -217,7 +223,8 @@ static void selfballoon_process(struct work_struct *work)
cur_pages = totalram_pages;
tgt_pages = cur_pages; /* default is no change */
goal_pages = percpu_counter_read_positive(&vm_committed_as) +
- totalreserve_pages;
+ totalreserve_pages +
+ MB2PAGES(selfballoon_reserved_mb);
#ifdef CONFIG_FRONTSWAP
/* allow space for frontswap pages to be repatriated */
if (frontswap_selfshrinking && frontswap_enabled)
@@ -397,6 +404,30 @@ static DEVICE_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR,
show_selfballoon_min_usable_mb,
store_selfballoon_min_usable_mb);
+SELFBALLOON_SHOW(selfballoon_reserved_mb, "%d\n",
+ selfballoon_reserved_mb);
+
+static ssize_t store_selfballoon_reserved_mb(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ err = strict_strtoul(buf, 10, &val);
+ if (err || val == 0)
+ return -EINVAL;
+ selfballoon_reserved_mb = val;
+ return count;
+}
+
+static DEVICE_ATTR(selfballoon_reserved_mb, S_IRUGO | S_IWUSR,
+ show_selfballoon_reserved_mb,
+ store_selfballoon_reserved_mb);
+
#ifdef CONFIG_FRONTSWAP
SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking);
@@ -480,6 +511,7 @@ static struct attribute *selfballoon_attrs[] = {
&dev_attr_selfballoon_downhysteresis.attr,
&dev_attr_selfballoon_uphysteresis.attr,
&dev_attr_selfballoon_min_usable_mb.attr,
+ &dev_attr_selfballoon_reserved_mb.attr,
#ifdef CONFIG_FRONTSWAP
&dev_attr_frontswap_selfshrinking.attr,
&dev_attr_frontswap_hysteresis.attr,
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index 2eff7a6aaa20..52fe7ad07666 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -234,3 +234,9 @@ int xb_init_comms(void)
return 0;
}
+
+void xb_deinit_comms(void)
+{
+ unbind_from_irqhandler(xenbus_irq, &xb_waitq);
+ xenbus_irq = 0;
+}
diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h
index 6e42800fa499..c8abd3b8a6c4 100644
--- a/drivers/xen/xenbus/xenbus_comms.h
+++ b/drivers/xen/xenbus/xenbus_comms.h
@@ -35,6 +35,7 @@
int xs_init(void);
int xb_init_comms(void);
+void xb_deinit_comms(void);
/* Low level routines. */
int xb_write(const void *data, unsigned len);
diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c
index 3d3be78c1093..be738c43104b 100644
--- a/drivers/xen/xenbus/xenbus_dev_backend.c
+++ b/drivers/xen/xenbus/xenbus_dev_backend.c
@@ -8,7 +8,11 @@
#include <xen/xen.h>
#include <xen/page.h>
+#include <xen/xenbus.h>
#include <xen/xenbus_dev.h>
+#include <xen/grant_table.h>
+#include <xen/events.h>
+#include <asm/xen/hypervisor.h>
#include "xenbus_comms.h"
@@ -22,6 +26,50 @@ static int xenbus_backend_open(struct inode *inode, struct file *filp)
return nonseekable_open(inode, filp);
}
+static long xenbus_alloc(domid_t domid)
+{
+ struct evtchn_alloc_unbound arg;
+ int err = -EEXIST;
+
+ xs_suspend();
+
+ /* If xenstored_ready is nonzero, that means we have already talked to
+ * xenstore and set up watches. These watches will be restored by
+ * xs_resume, but that requires communication over the port established
+ * below that is not visible to anyone until the ioctl returns.
+ *
+ * This can be resolved by splitting the ioctl into two parts
+ * (postponing the resume until xenstored is active) but this is
+ * unnecessarily complex for the intended use where xenstored is only
+ * started once - so return -EEXIST if it's already running.
+ */
+ if (xenstored_ready)
+ goto out_err;
+
+ gnttab_grant_foreign_access_ref(GNTTAB_RESERVED_XENSTORE, domid,
+ virt_to_mfn(xen_store_interface), 0 /* writable */);
+
+ arg.dom = DOMID_SELF;
+ arg.remote_dom = domid;
+
+ err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &arg);
+ if (err)
+ goto out_err;
+
+ if (xen_store_evtchn > 0)
+ xb_deinit_comms();
+
+ xen_store_evtchn = arg.port;
+
+ xs_resume();
+
+ return arg.port;
+
+ out_err:
+ xs_suspend_cancel();
+ return err;
+}
+
static long xenbus_backend_ioctl(struct file *file, unsigned int cmd, unsigned long data)
{
if (!capable(CAP_SYS_ADMIN))
@@ -33,6 +81,9 @@ static long xenbus_backend_ioctl(struct file *file, unsigned int cmd, unsigned l
return xen_store_evtchn;
return -ENODEV;
+ case IOCTL_XENBUS_BACKEND_SETUP:
+ return xenbus_alloc(data);
+
default:
return -ENOTTY;
}
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 5dfafdd1dbd3..2340f6978d6e 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -20,6 +20,7 @@
#include <linux/namei.h>
#include <linux/debugfs.h>
#include <linux/io.h>
+#include <linux/slab.h>
static ssize_t default_read_file(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
@@ -520,6 +521,133 @@ struct dentry *debugfs_create_blob(const char *name, umode_t mode,
}
EXPORT_SYMBOL_GPL(debugfs_create_blob);
+struct array_data {
+ void *array;
+ u32 elements;
+};
+
+static int u32_array_open(struct inode *inode, struct file *file)
+{
+ file->private_data = NULL;
+ return nonseekable_open(inode, file);
+}
+
+static size_t format_array(char *buf, size_t bufsize, const char *fmt,
+ u32 *array, u32 array_size)
+{
+ size_t ret = 0;
+ u32 i;
+
+ for (i = 0; i < array_size; i++) {
+ size_t len;
+
+ len = snprintf(buf, bufsize, fmt, array[i]);
+ len++; /* ' ' or '\n' */
+ ret += len;
+
+ if (buf) {
+ buf += len;
+ bufsize -= len;
+ buf[-1] = (i == array_size-1) ? '\n' : ' ';
+ }
+ }
+
+ ret++; /* \0 */
+ if (buf)
+ *buf = '\0';
+
+ return ret;
+}
+
+static char *format_array_alloc(const char *fmt, u32 *array,
+ u32 array_size)
+{
+ size_t len = format_array(NULL, 0, fmt, array, array_size);
+ char *ret;
+
+ ret = kmalloc(len, GFP_KERNEL);
+ if (ret == NULL)
+ return NULL;
+
+ format_array(ret, len, fmt, array, array_size);
+ return ret;
+}
+
+static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
+ loff_t *ppos)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct array_data *data = inode->i_private;
+ size_t size;
+
+ if (*ppos == 0) {
+ if (file->private_data) {
+ kfree(file->private_data);
+ file->private_data = NULL;
+ }
+
+ file->private_data = format_array_alloc("%u", data->array,
+ data->elements);
+ }
+
+ size = 0;
+ if (file->private_data)
+ size = strlen(file->private_data);
+
+ return simple_read_from_buffer(buf, len, ppos,
+ file->private_data, size);
+}
+
+static int u32_array_release(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+
+ return 0;
+}
+
+static const struct file_operations u32_array_fops = {
+ .owner = THIS_MODULE,
+ .open = u32_array_open,
+ .release = u32_array_release,
+ .read = u32_array_read,
+ .llseek = no_llseek,
+};
+
+/**
+ * debugfs_create_u32_array - create a debugfs file that is used to read u32
+ * array.
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have.
+ * @parent: a pointer to the parent dentry for this file. This should be a
+ * directory dentry if set. If this parameter is %NULL, then the
+ * file will be created in the root of the debugfs filesystem.
+ * @array: u32 array that provides data.
+ * @elements: total number of elements in the array.
+ *
+ * This function creates a file in debugfs with the given name that exports
+ * @array as data. If the @mode variable is so set it can be read from.
+ * Writing is not supported. Seek within the file is also not supported.
+ * Once array is created its size can not be changed.
+ *
+ * The function returns a pointer to dentry on success. If debugfs is not
+ * enabled in the kernel, the value -%ENODEV will be returned.
+ */
+struct dentry *debugfs_create_u32_array(const char *name, umode_t mode,
+ struct dentry *parent,
+ u32 *array, u32 elements)
+{
+ struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL);
+
+ if (data == NULL)
+ return NULL;
+
+ data->array = array;
+ data->elements = elements;
+
+ return debugfs_create_file(name, mode, parent, data, &u32_array_fops);
+}
+EXPORT_SYMBOL_GPL(debugfs_create_u32_array);
+
#ifdef CONFIG_HAS_IOMEM
/*
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index ae36b72c22f3..66c434f5dd1e 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -93,6 +93,10 @@ struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
int nregs, void __iomem *base, char *prefix);
+struct dentry *debugfs_create_u32_array(const char *name, umode_t mode,
+ struct dentry *parent,
+ u32 *array, u32 elements);
+
bool debugfs_initialized(void);
#else
@@ -219,6 +223,13 @@ static inline bool debugfs_initialized(void)
return false;
}
+static inline struct dentry *debugfs_create_u32_array(const char *name, umode_t mode,
+ struct dentry *parent,
+ u32 *array, u32 elements)
+{
+ return ERR_PTR(-ENODEV);
+}
+
#endif
#endif
diff --git a/include/xen/acpi.h b/include/xen/acpi.h
new file mode 100644
index 000000000000..48a9c0171b65
--- /dev/null
+++ b/include/xen/acpi.h
@@ -0,0 +1,58 @@
+/******************************************************************************
+ * acpi.h
+ * acpi file for domain 0 kernel
+ *
+ * Copyright (c) 2011 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+ * Copyright (c) 2011 Yu Ke <ke.yu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _XEN_ACPI_H
+#define _XEN_ACPI_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_XEN_DOM0
+#include <asm/xen/hypervisor.h>
+#include <xen/xen.h>
+#include <linux/acpi.h>
+
+int xen_acpi_notify_hypervisor_state(u8 sleep_state,
+ u32 pm1a_cnt, u32 pm1b_cnd);
+
+static inline void xen_acpi_sleep_register(void)
+{
+ if (xen_initial_domain())
+ acpi_os_set_prepare_sleep(
+ &xen_acpi_notify_hypervisor_state);
+}
+#else
+static inline void xen_acpi_sleep_register(void)
+{
+}
+#endif
+
+#endif /* _XEN_ACPI_H */
diff --git a/include/xen/events.h b/include/xen/events.h
index 0f773708e02c..04399b28e821 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -103,6 +103,9 @@ int xen_irq_from_pirq(unsigned pirq);
/* Return the pirq allocated to the irq. */
int xen_pirq_from_irq(unsigned irq);
+/* Return the irq allocated to the gsi */
+int xen_irq_from_gsi(unsigned gsi);
+
/* Determine whether to ignore this IRQ if it is passed to a guest. */
int xen_test_irq_shared(int irq);
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 15f8a00ff003..11e27c3af3cb 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -46,6 +46,8 @@
#include <xen/features.h>
+#define GNTTAB_RESERVED_XENSTORE 1
+
/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
#define NR_GRANT_FRAMES 4
diff --git a/include/xen/xenbus_dev.h b/include/xen/xenbus_dev.h
index ac5f0fe47ed9..bbee8c6a349d 100644
--- a/include/xen/xenbus_dev.h
+++ b/include/xen/xenbus_dev.h
@@ -38,4 +38,7 @@
#define IOCTL_XENBUS_BACKEND_EVTCHN \
_IOC(_IOC_NONE, 'B', 0, 0)
+#define IOCTL_XENBUS_BACKEND_SETUP \
+ _IOC(_IOC_NONE, 'B', 1, 0)
+
#endif /* __LINUX_XEN_XENBUS_DEV_H__ */