aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/platform/efi/efi.c
diff options
context:
space:
mode:
authorBorislav Petkov <bp@suse.de>2014-01-18 12:48:17 +0100
committerMatt Fleming <matt.fleming@intel.com>2014-03-04 16:17:18 +0000
commitb7b898ae0c0a82489511a1ce1b35f26215e6beb5 (patch)
tree79167e1a9fe59bf3e4b55772ccd294bf9cad2b11 /arch/x86/platform/efi/efi.c
parent42a5477251f0e0f33ad5f6a95c48d685ec03191e (diff)
x86/efi: Make efi virtual runtime map passing more robust
Currently, running SetVirtualAddressMap() and passing the physical address of the virtual map array was working only by a lucky coincidence because the memory was present in the EFI page table too. Until Toshi went and booted this on a big HP box - the krealloc() manner of resizing the memmap we're doing did allocate from such physical addresses which were not mapped anymore and boom: http://lkml.kernel.org/r/1386806463.1791.295.camel@misato.fc.hp.com One way to take care of that issue is to reimplement the krealloc thing but with pages. We start with contiguous pages of order 1, i.e. 2 pages, and when we deplete that memory (shouldn't happen all that often but you know firmware) we realloc the next power-of-two pages. Having the pages, it is much more handy and easy to map them into the EFI page table with the already existing mapping code which we're using for building the virtual mappings. Thanks to Toshi Kani and Matt for the great debugging help. Reported-by: Toshi Kani <toshi.kani@hp.com> Signed-off-by: Borislav Petkov <bp@suse.de> Tested-by: Toshi Kani <toshi.kani@hp.com> Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Diffstat (limited to 'arch/x86/platform/efi/efi.c')
-rw-r--r--arch/x86/platform/efi/efi.c99
1 files changed, 78 insertions, 21 deletions
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 37f20d71ec4b..576bb126593a 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -926,14 +926,36 @@ static void __init efi_map_regions_fixed(void)
}
+static void *realloc_pages(void *old_memmap, int old_shift)
+{
+ void *ret;
+
+ ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1);
+ if (!ret)
+ goto out;
+
+ /*
+ * A first-time allocation doesn't have anything to copy.
+ */
+ if (!old_memmap)
+ return ret;
+
+ memcpy(ret, old_memmap, PAGE_SIZE << old_shift);
+
+out:
+ free_pages((unsigned long)old_memmap, old_shift);
+ return ret;
+}
+
/*
- * Map efi memory ranges for runtime serivce and update new_memmap with virtual
- * addresses.
+ * Map the efi memory ranges of the runtime services and update new_mmap with
+ * virtual addresses.
*/
-static void * __init efi_map_regions(int *count)
+static void * __init efi_map_regions(int *count, int *pg_shift)
{
+ void *p, *new_memmap = NULL;
+ unsigned long left = 0;
efi_memory_desc_t *md;
- void *p, *tmp, *new_memmap = NULL;
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
@@ -948,20 +970,23 @@ static void * __init efi_map_regions(int *count)
efi_map_region(md);
get_systab_virt_addr(md);
- tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size,
- GFP_KERNEL);
- if (!tmp)
- goto out;
- new_memmap = tmp;
+ if (left < memmap.desc_size) {
+ new_memmap = realloc_pages(new_memmap, *pg_shift);
+ if (!new_memmap)
+ return NULL;
+
+ left += PAGE_SIZE << *pg_shift;
+ (*pg_shift)++;
+ }
+
memcpy(new_memmap + (*count * memmap.desc_size), md,
memmap.desc_size);
+
+ left -= memmap.desc_size;
(*count)++;
}
return new_memmap;
-out:
- kfree(new_memmap);
- return NULL;
}
/*
@@ -987,9 +1012,9 @@ out:
*/
void __init efi_enter_virtual_mode(void)
{
- efi_status_t status;
+ int err, count = 0, pg_shift = 0;
void *new_memmap = NULL;
- int err, count = 0;
+ efi_status_t status;
efi.systab = NULL;
@@ -1006,20 +1031,24 @@ void __init efi_enter_virtual_mode(void)
efi_map_regions_fixed();
} else {
efi_merge_regions();
- new_memmap = efi_map_regions(&count);
+ new_memmap = efi_map_regions(&count, &pg_shift);
if (!new_memmap) {
pr_err("Error reallocating memory, EFI runtime non-functional!\n");
return;
}
- }
- err = save_runtime_map();
- if (err)
- pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
+ err = save_runtime_map();
+ if (err)
+ pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
+ }
BUG_ON(!efi.systab);
- efi_setup_page_tables();
+ if (!efi_setup) {
+ if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift))
+ return;
+ }
+
efi_sync_low_kernel_mappings();
efi_dump_pagetable();
@@ -1060,7 +1089,35 @@ void __init efi_enter_virtual_mode(void)
efi_runtime_mkexec();
- kfree(new_memmap);
+
+ /*
+ * We mapped the descriptor array into the EFI pagetable above but we're
+ * not unmapping it here. Here's why:
+ *
+ * We're copying select PGDs from the kernel page table to the EFI page
+ * table and when we do so and make changes to those PGDs like unmapping
+ * stuff from them, those changes appear in the kernel page table and we
+ * go boom.
+ *
+ * From setup_real_mode():
+ *
+ * ...
+ * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
+ *
+ * In this particular case, our allocation is in PGD 0 of the EFI page
+ * table but we've copied that PGD from PGD[272] of the EFI page table:
+ *
+ * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272
+ *
+ * where the direct memory mapping in kernel space is.
+ *
+ * new_memmap's VA comes from that direct mapping and thus clearing it,
+ * it would get cleared in the kernel page table too.
+ *
+ * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
+ */
+ if (!efi_setup)
+ free_pages((unsigned long)new_memmap, pg_shift);
/* clean DUMMY object */
efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,