aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorAlex Shi <alex.shi@linaro.org>2017-06-28 22:13:32 +0800
committerAlex Shi <alex.shi@linaro.org>2017-06-28 22:13:32 +0800
commit084eedbddaa526f53b20291ee7176be269620301 (patch)
treeffcc23dd6b939e1db3d7feb1bf561073165af6be /arch
parent3e71247d343d3d9e4ff996255ac58b73dcf1ce27 (diff)
parent64e72fbc97b29298f7aa89c30fb221e1a9d02fed (diff)
Merge branch 'lsk/kdump/for-v4.9' into linux-linaro-lsk-v4.9
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/Kconfig11
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/include/asm/cacheflush.h1
-rw-r--r--arch/arm64/include/asm/hardirq.h2
-rw-r--r--arch/arm64/include/asm/kexec.h52
-rw-r--r--arch/arm64/include/asm/smp.h3
-rw-r--r--arch/arm64/kernel/Makefile1
-rw-r--r--arch/arm64/kernel/crash_dump.c71
-rw-r--r--arch/arm64/kernel/hibernate.c10
-rw-r--r--arch/arm64/kernel/machine_kexec.c170
-rw-r--r--arch/arm64/kernel/setup.c7
-rw-r--r--arch/arm64/kernel/smp.c68
-rw-r--r--arch/arm64/mm/init.c181
-rw-r--r--arch/arm64/mm/mmu.c89
-rw-r--r--arch/arm64/mm/pageattr.c15
15 files changed, 620 insertions, 62 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a5201663373b..1af604a1e07a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -699,6 +699,17 @@ config KEXEC
but it is independent of the system firmware. And like a reboot
you can start any kernel with it, not just Linux.
+config CRASH_DUMP
+ bool "Build kdump crash kernel"
+ help
+ Generate crash dump after being started by kexec. This should
+ be normally only set in special crash dump kernels which are
+ loaded in the main kernel with kexec-tools into a specially
+ reserved region and then later executed after a crash by
+ kdump/kexec.
+
+ For more details see Documentation/kdump/kdump.txt
+
config XEN_DOM0
def_bool y
depends on XEN
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index dab2cb0c1f1c..24922c9c73b8 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -79,6 +79,7 @@ CONFIG_CMA=y
CONFIG_SECCOMP=y
CONFIG_XEN=y
CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_COMPAT=y
CONFIG_CPU_IDLE=y
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 2e5fb976a572..15478762dbbd 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -149,5 +149,6 @@ int set_memory_ro(unsigned long addr, int numpages);
int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_valid(unsigned long addr, unsigned long size, int enable);
#endif
diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index 8740297dac77..1473fc2f7ab7 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -20,7 +20,7 @@
#include <linux/threads.h>
#include <asm/irq.h>
-#define NR_IPI 6
+#define NR_IPI 7
typedef struct {
unsigned int __softirq_pending;
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 04744dc5fb61..e17f0529a882 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -40,9 +40,59 @@
static inline void crash_setup_regs(struct pt_regs *newregs,
struct pt_regs *oldregs)
{
- /* Empty routine needed to avoid build errors. */
+ if (oldregs) {
+ memcpy(newregs, oldregs, sizeof(*newregs));
+ } else {
+ u64 tmp1, tmp2;
+
+ __asm__ __volatile__ (
+ "stp x0, x1, [%2, #16 * 0]\n"
+ "stp x2, x3, [%2, #16 * 1]\n"
+ "stp x4, x5, [%2, #16 * 2]\n"
+ "stp x6, x7, [%2, #16 * 3]\n"
+ "stp x8, x9, [%2, #16 * 4]\n"
+ "stp x10, x11, [%2, #16 * 5]\n"
+ "stp x12, x13, [%2, #16 * 6]\n"
+ "stp x14, x15, [%2, #16 * 7]\n"
+ "stp x16, x17, [%2, #16 * 8]\n"
+ "stp x18, x19, [%2, #16 * 9]\n"
+ "stp x20, x21, [%2, #16 * 10]\n"
+ "stp x22, x23, [%2, #16 * 11]\n"
+ "stp x24, x25, [%2, #16 * 12]\n"
+ "stp x26, x27, [%2, #16 * 13]\n"
+ "stp x28, x29, [%2, #16 * 14]\n"
+ "mov %0, sp\n"
+ "stp x30, %0, [%2, #16 * 15]\n"
+
+ "/* faked current PSTATE */\n"
+ "mrs %0, CurrentEL\n"
+ "mrs %1, SPSEL\n"
+ "orr %0, %0, %1\n"
+ "mrs %1, DAIF\n"
+ "orr %0, %0, %1\n"
+ "mrs %1, NZCV\n"
+ "orr %0, %0, %1\n"
+ /* pc */
+ "adr %1, 1f\n"
+ "1:\n"
+ "stp %1, %0, [%2, #16 * 16]\n"
+ : "=&r" (tmp1), "=&r" (tmp2)
+ : "r" (newregs)
+ : "memory"
+ );
+ }
}
+#if defined(CONFIG_KEXEC_CORE) && defined(CONFIG_HIBERNATION)
+extern bool crash_is_nosave(unsigned long pfn);
+extern void crash_prepare_suspend(void);
+extern void crash_post_resume(void);
+#else
+static inline bool crash_is_nosave(unsigned long pfn) {return false; }
+static inline void crash_prepare_suspend(void) {}
+static inline void crash_post_resume(void) {}
+#endif
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index d050d720a1b4..55f08c5acfad 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -148,6 +148,9 @@ static inline void cpu_panic_kernel(void)
*/
bool cpus_are_stuck_in_kernel(void);
+extern void smp_send_crash_stop(void);
+extern bool smp_crash_stop_failed(void);
+
#endif /* ifndef __ASSEMBLY__ */
#endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7d66bbaafc0c..6a7384eee08d 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -50,6 +50,7 @@ arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \
cpu-reset.o
+arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c
new file mode 100644
index 000000000000..f46d57c31443
--- /dev/null
+++ b/arch/arm64/kernel/crash_dump.c
@@ -0,0 +1,71 @@
+/*
+ * Routines for doing kexec-based kdump
+ *
+ * Copyright (C) 2017 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/crash_dump.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+#include <asm/memory.h>
+
+/**
+ * copy_oldmem_page() - copy one page from old kernel memory
+ * @pfn: page frame number to be copied
+ * @buf: buffer where the copied page is placed
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page
+ * @userbuf: if set, @buf is in a user address space
+ *
+ * This function copies one page from old kernel memory into buffer pointed by
+ * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
+ * copied or negative error in case of failure.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+ size_t csize, unsigned long offset,
+ int userbuf)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+
+ vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB);
+ if (!vaddr)
+ return -ENOMEM;
+
+ if (userbuf) {
+ if (copy_to_user((char __user *)buf, vaddr + offset, csize)) {
+ memunmap(vaddr);
+ return -EFAULT;
+ }
+ } else {
+ memcpy(buf, vaddr + offset, csize);
+ }
+
+ memunmap(vaddr);
+
+ return csize;
+}
+
+/**
+ * elfcorehdr_read - read from ELF core header
+ * @buf: buffer where the data is placed
+ * @csize: number of bytes to read
+ * @ppos: address in the memory
+ *
+ * This function reads @count bytes from elf core header which exists
+ * on crash dump kernel's memory.
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+ memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count);
+ return count;
+}
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index d55a7b09959b..4b10dc11e910 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -28,6 +28,7 @@
#include <asm/cacheflush.h>
#include <asm/cputype.h>
#include <asm/irqflags.h>
+#include <asm/kexec.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
@@ -105,7 +106,8 @@ int pfn_is_nosave(unsigned long pfn)
unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
- return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
+ return ((pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn)) ||
+ crash_is_nosave(pfn);
}
void notrace save_processor_state(void)
@@ -289,6 +291,9 @@ int swsusp_arch_suspend(void)
local_dbg_save(flags);
if (__cpu_suspend_enter(&state)) {
+ /* make the crash dump kernel image visible/saveable */
+ crash_prepare_suspend();
+
sleep_cpu = smp_processor_id();
ret = swsusp_save();
} else {
@@ -300,6 +305,9 @@ int swsusp_arch_suspend(void)
if (el2_reset_needed())
dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end);
+ /* make the crash dump kernel image protected again */
+ crash_post_resume();
+
/*
* Tell the hibernation core that we've just restored
* the memory
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index bc96c8a7fc79..481f54a866c5 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -9,12 +9,19 @@
* published by the Free Software Foundation.
*/
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
#include <linux/kexec.h>
+#include <linux/page-flags.h>
#include <linux/smp.h>
#include <asm/cacheflush.h>
#include <asm/cpu_ops.h>
+#include <asm/memory.h>
+#include <asm/mmu.h>
#include <asm/mmu_context.h>
+#include <asm/page.h>
#include "cpu-reset.h"
@@ -22,8 +29,6 @@
extern const unsigned char arm64_relocate_new_kernel[];
extern const unsigned long arm64_relocate_new_kernel_size;
-static unsigned long kimage_start;
-
/**
* kexec_image_info - For debugging output.
*/
@@ -64,8 +69,6 @@ void machine_kexec_cleanup(struct kimage *kimage)
*/
int machine_kexec_prepare(struct kimage *kimage)
{
- kimage_start = kimage->start;
-
kexec_image_info(kimage);
if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
@@ -144,11 +147,15 @@ void machine_kexec(struct kimage *kimage)
{
phys_addr_t reboot_code_buffer_phys;
void *reboot_code_buffer;
+ bool in_kexec_crash = (kimage == kexec_crash_image);
+ bool stuck_cpus = cpus_are_stuck_in_kernel();
/*
* New cpus may have become stuck_in_kernel after we loaded the image.
*/
- BUG_ON(cpus_are_stuck_in_kernel() || (num_online_cpus() > 1));
+ BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1)));
+ WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
+ "Some CPUs may be stale, kdump will be unreliable.\n");
reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
@@ -183,7 +190,7 @@ void machine_kexec(struct kimage *kimage)
kexec_list_flush(kimage);
/* Flush the new image if already in place. */
- if (kimage->head & IND_DONE)
+ if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE))
kexec_segment_flush(kimage);
pr_info("Bye!\n");
@@ -200,13 +207,158 @@ void machine_kexec(struct kimage *kimage)
* relocation is complete.
*/
- cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head,
- kimage_start, 0);
+ cpu_soft_restart(kimage != kexec_crash_image,
+ reboot_code_buffer_phys, kimage->head, kimage->start, 0);
BUG(); /* Should never get here. */
}
+static void machine_kexec_mask_interrupts(void)
+{
+ unsigned int i;
+ struct irq_desc *desc;
+
+ for_each_irq_desc(i, desc) {
+ struct irq_chip *chip;
+ int ret;
+
+ chip = irq_desc_get_chip(desc);
+ if (!chip)
+ continue;
+
+ /*
+ * First try to remove the active state. If this
+ * fails, try to EOI the interrupt.
+ */
+ ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
+
+ if (ret && irqd_irq_inprogress(&desc->irq_data) &&
+ chip->irq_eoi)
+ chip->irq_eoi(&desc->irq_data);
+
+ if (chip->irq_mask)
+ chip->irq_mask(&desc->irq_data);
+
+ if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+ chip->irq_disable(&desc->irq_data);
+ }
+}
+
+/**
+ * machine_crash_shutdown - shutdown non-crashing cpus and save registers
+ */
void machine_crash_shutdown(struct pt_regs *regs)
{
- /* Empty routine needed to avoid build errors. */
+ local_irq_disable();
+
+ /* shutdown non-crashing cpus */
+ smp_send_crash_stop();
+
+ /* for crashing cpu */
+ crash_save_cpu(regs, smp_processor_id());
+ machine_kexec_mask_interrupts();
+
+ pr_info("Starting crashdump kernel...\n");
+}
+
+void arch_kexec_protect_crashkres(void)
+{
+ int i;
+
+ kexec_segment_flush(kexec_crash_image);
+
+ for (i = 0; i < kexec_crash_image->nr_segments; i++)
+ set_memory_valid(
+ __phys_to_virt(kexec_crash_image->segment[i].mem),
+ kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0);
+}
+
+void arch_kexec_unprotect_crashkres(void)
+{
+ int i;
+
+ for (i = 0; i < kexec_crash_image->nr_segments; i++)
+ set_memory_valid(
+ __phys_to_virt(kexec_crash_image->segment[i].mem),
+ kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1);
+}
+
+#ifdef CONFIG_HIBERNATION
+/*
+ * To preserve the crash dump kernel image, the relevant memory segments
+ * should be mapped again around the hibernation.
+ */
+void crash_prepare_suspend(void)
+{
+ if (kexec_crash_image)
+ arch_kexec_unprotect_crashkres();
+}
+
+void crash_post_resume(void)
+{
+ if (kexec_crash_image)
+ arch_kexec_protect_crashkres();
+}
+
+/*
+ * crash_is_nosave
+ *
+ * Return true only if a page is part of reserved memory for crash dump kernel,
+ * but does not hold any data of loaded kernel image.
+ *
+ * Note that all the pages in crash dump kernel memory have been initially
+ * marked as Reserved in kexec_reserve_crashkres_pages().
+ *
+ * In hibernation, the pages which are Reserved and yet "nosave" are excluded
+ * from the hibernation iamge. crash_is_nosave() does thich check for crash
+ * dump kernel and will reduce the total size of hibernation image.
+ */
+
+bool crash_is_nosave(unsigned long pfn)
+{
+ int i;
+ phys_addr_t addr;
+
+ if (!crashk_res.end)
+ return false;
+
+ /* in reserved memory? */
+ addr = __pfn_to_phys(pfn);
+ if ((addr < crashk_res.start) || (crashk_res.end < addr))
+ return false;
+
+ if (!kexec_crash_image)
+ return true;
+
+ /* not part of loaded kernel image? */
+ for (i = 0; i < kexec_crash_image->nr_segments; i++)
+ if (addr >= kexec_crash_image->segment[i].mem &&
+ addr < (kexec_crash_image->segment[i].mem +
+ kexec_crash_image->segment[i].memsz))
+ return false;
+
+ return true;
+}
+
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+ unsigned long addr;
+ struct page *page;
+
+ for (addr = begin; addr < end; addr += PAGE_SIZE) {
+ page = phys_to_page(addr);
+ ClearPageReserved(page);
+ free_reserved_page(page);
+ }
+}
+#endif /* CONFIG_HIBERNATION */
+
+void arch_crash_save_vmcoreinfo(void)
+{
+ VMCOREINFO_NUMBER(VA_BITS);
+ /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
+ vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n",
+ kimage_voffset);
+ vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
+ PHYS_OFFSET);
}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index a53f52ac81c6..e4e2a5af81c4 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -31,7 +31,6 @@
#include <linux/screen_info.h>
#include <linux/init.h>
#include <linux/kexec.h>
-#include <linux/crash_dump.h>
#include <linux/root_dev.h>
#include <linux/cpu.h>
#include <linux/interrupt.h>
@@ -224,6 +223,12 @@ static void __init request_standard_resources(void)
if (kernel_data.start >= res->start &&
kernel_data.end <= res->end)
request_resource(res, &kernel_data);
+#ifdef CONFIG_KEXEC_CORE
+ /* Userspace will find "Crash kernel" region in /proc/iomem. */
+ if (crashk_res.end && crashk_res.start >= res->start &&
+ crashk_res.end <= res->end)
+ request_resource(res, &crashk_res);
+#endif
}
}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index cb87234cfcf2..11f28909a806 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -37,6 +37,7 @@
#include <linux/completion.h>
#include <linux/of.h>
#include <linux/irq_work.h>
+#include <linux/kexec.h>
#include <asm/alternative.h>
#include <asm/atomic.h>
@@ -74,6 +75,7 @@ enum ipi_msg_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
IPI_CPU_STOP,
+ IPI_CPU_CRASH_STOP,
IPI_TIMER,
IPI_IRQ_WORK,
IPI_WAKEUP
@@ -753,6 +755,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
S(IPI_RESCHEDULE, "Rescheduling interrupts"),
S(IPI_CALL_FUNC, "Function call interrupts"),
S(IPI_CPU_STOP, "CPU stop interrupts"),
+ S(IPI_CPU_CRASH_STOP, "CPU stop (for crash dump) interrupts"),
S(IPI_TIMER, "Timer broadcast interrupts"),
S(IPI_IRQ_WORK, "IRQ work interrupts"),
S(IPI_WAKEUP, "CPU wake-up interrupts"),
@@ -827,6 +830,29 @@ static void ipi_cpu_stop(unsigned int cpu)
cpu_relax();
}
+#ifdef CONFIG_KEXEC_CORE
+static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
+#endif
+
+static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
+{
+#ifdef CONFIG_KEXEC_CORE
+ crash_save_cpu(regs, cpu);
+
+ atomic_dec(&waiting_for_crash_ipi);
+
+ local_irq_disable();
+
+#ifdef CONFIG_HOTPLUG_CPU
+ if (cpu_ops[cpu]->cpu_die)
+ cpu_ops[cpu]->cpu_die(cpu);
+#endif
+
+ /* just in case */
+ cpu_park_loop();
+#endif
+}
+
/*
* Main handler for inter-processor interrupts
*/
@@ -857,6 +883,15 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
irq_exit();
break;
+ case IPI_CPU_CRASH_STOP:
+ if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
+ irq_enter();
+ ipi_cpu_crash_stop(cpu, regs);
+
+ unreachable();
+ }
+ break;
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
case IPI_TIMER:
irq_enter();
@@ -929,6 +964,39 @@ void smp_send_stop(void)
cpumask_pr_args(cpu_online_mask));
}
+#ifdef CONFIG_KEXEC_CORE
+void smp_send_crash_stop(void)
+{
+ cpumask_t mask;
+ unsigned long timeout;
+
+ if (num_online_cpus() == 1)
+ return;
+
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
+ atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+
+ pr_crit("SMP: stopping secondary CPUs\n");
+ smp_cross_call(&mask, IPI_CPU_CRASH_STOP);
+
+ /* Wait up to one second for other CPUs to stop */
+ timeout = USEC_PER_SEC;
+ while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--)
+ udelay(1);
+
+ if (atomic_read(&waiting_for_crash_ipi) > 0)
+ pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
+}
+
+bool smp_crash_stop_failed(void)
+{
+ return (atomic_read(&waiting_for_crash_ipi) > 0);
+}
+#endif
+
/*
* not supported here
*/
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 380ebe705093..4036d30975dd 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -30,12 +30,15 @@
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <linux/sort.h>
+#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/dma-mapping.h>
#include <linux/dma-contiguous.h>
#include <linux/efi.h>
#include <linux/swiotlb.h>
#include <linux/vmalloc.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
#include <asm/boot.h>
#include <asm/fixmap.h>
@@ -76,6 +79,142 @@ static int __init early_initrd(char *p)
early_param("initrd", early_initrd);
#endif
+#ifdef CONFIG_KEXEC_CORE
+/*
+ * reserve_crashkernel() - reserves memory for crash kernel
+ *
+ * This function reserves memory area given in "crashkernel=" kernel command
+ * line parameter. The memory reserved is used by dump capture kernel when
+ * primary kernel is crashing.
+ */
+static void __init reserve_crashkernel(void)
+{
+ unsigned long long crash_base, crash_size;
+ int ret;
+
+ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+ &crash_size, &crash_base);
+ /* no crashkernel= or invalid value specified */
+ if (ret || !crash_size)
+ return;
+
+ crash_size = PAGE_ALIGN(crash_size);
+
+ if (crash_base == 0) {
+ /* Current arm64 boot protocol requires 2MB alignment */
+ crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
+ crash_size, SZ_2M);
+ if (crash_base == 0) {
+ pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
+ crash_size);
+ return;
+ }
+ } else {
+ /* User specifies base address explicitly. */
+ if (!memblock_is_region_memory(crash_base, crash_size)) {
+ pr_warn("cannot reserve crashkernel: region is not memory\n");
+ return;
+ }
+
+ if (memblock_is_region_reserved(crash_base, crash_size)) {
+ pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n");
+ return;
+ }
+
+ if (!IS_ALIGNED(crash_base, SZ_2M)) {
+ pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n");
+ return;
+ }
+ }
+ memblock_reserve(crash_base, crash_size);
+
+ pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
+ crash_base, crash_base + crash_size, crash_size >> 20);
+
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+}
+
+static void __init kexec_reserve_crashkres_pages(void)
+{
+#ifdef CONFIG_HIBERNATION
+ phys_addr_t addr;
+ struct page *page;
+
+ if (!crashk_res.end)
+ return;
+
+ /*
+ * To reduce the size of hibernation image, all the pages are
+ * marked as Reserved initially.
+ */
+ for (addr = crashk_res.start; addr < (crashk_res.end + 1);
+ addr += PAGE_SIZE) {
+ page = phys_to_page(addr);
+ SetPageReserved(page);
+ }
+#endif
+}
+#else
+static void __init reserve_crashkernel(void)
+{
+}
+
+static void __init kexec_reserve_crashkres_pages(void)
+{
+}
+#endif /* CONFIG_KEXEC_CORE */
+
+#ifdef CONFIG_CRASH_DUMP
+static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
+ const char *uname, int depth, void *data)
+{
+ const __be32 *reg;
+ int len;
+
+ if (depth != 1 || strcmp(uname, "chosen") != 0)
+ return 0;
+
+ reg = of_get_flat_dt_prop(node, "linux,elfcorehdr", &len);
+ if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
+ return 1;
+
+ elfcorehdr_addr = dt_mem_next_cell(dt_root_addr_cells, &reg);
+ elfcorehdr_size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+ return 1;
+}
+
+/*
+ * reserve_elfcorehdr() - reserves memory for elf core header
+ *
+ * This function reserves the memory occupied by an elf core header
+ * described in the device tree. This region contains all the
+ * information about primary kernel's core image and is used by a dump
+ * capture kernel to access the system memory on primary kernel.
+ */
+static void __init reserve_elfcorehdr(void)
+{
+ of_scan_flat_dt(early_init_dt_scan_elfcorehdr, NULL);
+
+ if (!elfcorehdr_size)
+ return;
+
+ if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
+ pr_warn("elfcorehdr is overlapped\n");
+ return;
+ }
+
+ memblock_reserve(elfcorehdr_addr, elfcorehdr_size);
+
+ pr_info("Reserving %lldKB of memory at 0x%llx for elfcorehdr\n",
+ elfcorehdr_size >> 10, elfcorehdr_addr);
+}
+#else
+static void __init reserve_elfcorehdr(void)
+{
+}
+#endif /* CONFIG_CRASH_DUMP */
/*
* Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
* currently assumes that for memory starting above 4G, 32-bit devices will
@@ -187,10 +326,45 @@ static int __init early_mem(char *p)
}
early_param("mem", early_mem);
+static int __init early_init_dt_scan_usablemem(unsigned long node,
+ const char *uname, int depth, void *data)
+{
+ struct memblock_region *usablemem = data;
+ const __be32 *reg;
+ int len;
+
+ if (depth != 1 || strcmp(uname, "chosen") != 0)
+ return 0;
+
+ reg = of_get_flat_dt_prop(node, "linux,usable-memory-range", &len);
+ if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
+ return 1;
+
+ usablemem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
+ usablemem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+ return 1;
+}
+
+static void __init fdt_enforce_memory_region(void)
+{
+ struct memblock_region reg = {
+ .size = 0,
+ };
+
+ of_scan_flat_dt(early_init_dt_scan_usablemem, &reg);
+
+ if (reg.size)
+ memblock_cap_memory_range(reg.base, reg.size);
+}
+
void __init arm64_memblock_init(void)
{
const s64 linear_region_size = -(s64)PAGE_OFFSET;
+ /* Handle linux,usable-memory-range property */
+ fdt_enforce_memory_region();
+
/*
* Ensure that the linear region takes up exactly half of the kernel
* virtual address space. This way, we can distinguish a linear address
@@ -296,6 +470,11 @@ void __init arm64_memblock_init(void)
arm64_dma_phys_limit = max_zone_dma_phys();
else
arm64_dma_phys_limit = PHYS_MASK + 1;
+
+ reserve_crashkernel();
+
+ reserve_elfcorehdr();
+
dma_contiguous_reserve(arm64_dma_phys_limit);
memblock_allow_resize();
@@ -415,6 +594,8 @@ void __init mem_init(void)
/* this will put all unused low memory onto the freelists */
free_all_bootmem();
+ kexec_reserve_crashkres_pages();
+
mem_init_print_info(NULL);
#define MLK(b, t) b, t, ((t) - (b)) >> 10
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 05615a3fdc6f..b6c838e9fa4d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -22,6 +22,8 @@
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/kexec.h>
#include <linux/libfdt.h>
#include <linux/mman.h>
#include <linux/nodemask.h>
@@ -317,56 +319,32 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
NULL, !debug_pagealloc_enabled());
}
-static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
+static void __init __map_memblock(pgd_t *pgd, phys_addr_t start,
+ phys_addr_t end, pgprot_t prot,
+ bool allow_block_mappings)
+{
+ __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start,
+ prot, early_pgtable_alloc, allow_block_mappings);
+}
+
+static void __init map_mem(pgd_t *pgd)
{
unsigned long kernel_start = __pa(_text);
unsigned long kernel_end = __pa(__init_begin);
+ struct memblock_region *reg;
/*
* Take care not to create a writable alias for the
* read-only text and rodata sections of the kernel image.
+ * So temporarily mark them as NOMAP to skip mappings in
+ * the following for-loop
*/
-
- /* No overlap with the kernel text/rodata */
- if (end < kernel_start || start >= kernel_end) {
- __create_pgd_mapping(pgd, start, __phys_to_virt(start),
- end - start, PAGE_KERNEL,
- early_pgtable_alloc,
- !debug_pagealloc_enabled());
- return;
- }
-
- /*
- * This block overlaps the kernel text/rodata mappings.
- * Map the portion(s) which don't overlap.
- */
- if (start < kernel_start)
- __create_pgd_mapping(pgd, start,
- __phys_to_virt(start),
- kernel_start - start, PAGE_KERNEL,
- early_pgtable_alloc,
- !debug_pagealloc_enabled());
- if (kernel_end < end)
- __create_pgd_mapping(pgd, kernel_end,
- __phys_to_virt(kernel_end),
- end - kernel_end, PAGE_KERNEL,
- early_pgtable_alloc,
- !debug_pagealloc_enabled());
-
- /*
- * Map the linear alias of the [_text, __init_begin) interval as
- * read-only/non-executable. This makes the contents of the
- * region accessible to subsystems such as hibernate, but
- * protects it from inadvertent modification or execution.
- */
- __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
- kernel_end - kernel_start, PAGE_KERNEL_RO,
- early_pgtable_alloc, !debug_pagealloc_enabled());
-}
-
-static void __init map_mem(pgd_t *pgd)
-{
- struct memblock_region *reg;
+ memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
+#ifdef CONFIG_KEXEC_CORE
+ if (crashk_res.end)
+ memblock_mark_nomap(crashk_res.start,
+ resource_size(&crashk_res));
+#endif
/* map all the memory banks */
for_each_memblock(memory, reg) {
@@ -378,8 +356,33 @@ static void __init map_mem(pgd_t *pgd)
if (memblock_is_nomap(reg))
continue;
- __map_memblock(pgd, start, end);
+ __map_memblock(pgd, start, end,
+ PAGE_KERNEL, !debug_pagealloc_enabled());
+ }
+
+ /*
+ * Map the linear alias of the [_text, __init_begin) interval as
+ * read-only/non-executable. This makes the contents of the
+ * region accessible to subsystems such as hibernate, but
+ * protects it from inadvertent modification or execution.
+ */
+ __map_memblock(pgd, kernel_start, kernel_end,
+ PAGE_KERNEL_RO, !debug_pagealloc_enabled());
+ memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
+
+#ifdef CONFIG_KEXEC_CORE
+ /*
+ * Use page-level mappings here so that we can shrink the region
+ * in page granularity and put back unused memory to buddy system
+ * through /sys/kernel/kexec_crash_size interface.
+ */
+ if (crashk_res.end) {
+ __map_memblock(pgd, crashk_res.start, crashk_res.end + 1,
+ PAGE_KERNEL, false);
+ memblock_clear_nomap(crashk_res.start,
+ resource_size(&crashk_res));
}
+#endif
}
void mark_rodata_ro(void)
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 8def55e7249b..3212ee0558f6 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -125,20 +125,23 @@ int set_memory_x(unsigned long addr, int numpages)
}
EXPORT_SYMBOL_GPL(set_memory_x);
-#ifdef CONFIG_DEBUG_PAGEALLOC
-void __kernel_map_pages(struct page *page, int numpages, int enable)
+int set_memory_valid(unsigned long addr, int numpages, int enable)
{
- unsigned long addr = (unsigned long) page_address(page);
-
if (enable)
- __change_memory_common(addr, PAGE_SIZE * numpages,
+ return __change_memory_common(addr, PAGE_SIZE * numpages,
__pgprot(PTE_VALID),
__pgprot(0));
else
- __change_memory_common(addr, PAGE_SIZE * numpages,
+ return __change_memory_common(addr, PAGE_SIZE * numpages,
__pgprot(0),
__pgprot(PTE_VALID));
}
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ set_memory_valid((unsigned long)page_address(page), numpages, enable);
+}
#ifdef CONFIG_HIBERNATION
/*
* When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function