aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel/mce_power.c
diff options
context:
space:
mode:
authorBalbir Singh <bsingharora@gmail.com>2017-09-29 14:26:53 +1000
committerMichael Ellerman <mpe@ellerman.id.au>2017-10-16 23:12:01 +1100
commitba41e1e1ccb9771ce41a3b8e2121f95486e76ac9 (patch)
treea6ca6c4b6e2cd1ce5eabf605b9ec2884fbf912de /arch/powerpc/kernel/mce_power.c
parent81b61fa7a065e639d097b09c303e3212e4264fac (diff)
powerpc/mce: Hookup derror (load/store) UE errors
Extract physical_address for UE errors by walking the page tables for the mm and address at the NIP, to extract the instruction. Then use the instruction to find the effective address via analyse_instr(). We might have page table walking races, but we expect them to be rare, the physical address extraction is best effort. The idea is to then hook up this infrastructure to memory failure eventually. Signed-off-by: Balbir Singh <bsingharora@gmail.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/kernel/mce_power.c')
-rw-r--r--arch/powerpc/kernel/mce_power.c87
1 files changed, 82 insertions, 5 deletions
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index d37e612050b5..e61b80e37e1f 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -27,6 +27,36 @@
#include <asm/mmu.h>
#include <asm/mce.h>
#include <asm/machdep.h>
+#include <asm/pgtable.h>
+#include <asm/pte-walk.h>
+#include <asm/sstep.h>
+#include <asm/exception-64s.h>
+
+/*
+ * Convert an address related to an mm to a PFN. NOTE: we are in real
+ * mode, we could potentially race with page table updates.
+ */
+static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
+{
+ pte_t *ptep;
+ unsigned long flags;
+ struct mm_struct *mm;
+
+ if (user_mode(regs))
+ mm = current->mm;
+ else
+ mm = &init_mm;
+
+ local_irq_save(flags);
+ if (mm == current->mm)
+ ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
+ else
+ ptep = find_init_mm_pte(addr, NULL);
+ local_irq_restore(flags);
+ if (!ptep || pte_special(*ptep))
+ return ULONG_MAX;
+ return pte_pfn(*ptep);
+}
static void flush_tlb_206(unsigned int num_sets, unsigned int action)
{
@@ -421,6 +451,41 @@ static const struct mce_derror_table mce_p9_derror_table[] = {
MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
{ 0, false, 0, 0, 0, 0 } };
+static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
+ uint64_t *phys_addr)
+{
+ /*
+ * Carefully look at the NIP to determine
+ * the instruction to analyse. Reading the NIP
+ * in real-mode is tricky and can lead to recursive
+ * faults
+ */
+ int instr;
+ unsigned long pfn, instr_addr;
+ struct instruction_op op;
+ struct pt_regs tmp = *regs;
+
+ pfn = addr_to_pfn(regs, regs->nip);
+ if (pfn != ULONG_MAX) {
+ instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
+ instr = *(unsigned int *)(instr_addr);
+ if (!analyse_instr(&op, &tmp, instr)) {
+ pfn = addr_to_pfn(regs, op.ea);
+ *addr = op.ea;
+ *phys_addr = (pfn << PAGE_SHIFT);
+ return 0;
+ }
+ /*
+ * analyse_instr() might fail if the instruction
+ * is not a load/store, although this is unexpected
+ * for load/store errors or if we got the NIP
+ * wrong
+ */
+ }
+ *addr = 0;
+ return -1;
+}
+
static int mce_handle_ierror(struct pt_regs *regs,
const struct mce_ierror_table table[],
struct mce_error_info *mce_err, uint64_t *addr)
@@ -489,7 +554,8 @@ static int mce_handle_ierror(struct pt_regs *regs,
static int mce_handle_derror(struct pt_regs *regs,
const struct mce_derror_table table[],
- struct mce_error_info *mce_err, uint64_t *addr)
+ struct mce_error_info *mce_err, uint64_t *addr,
+ uint64_t *phys_addr)
{
uint64_t dsisr = regs->dsisr;
int handled = 0;
@@ -555,7 +621,17 @@ static int mce_handle_derror(struct pt_regs *regs,
mce_err->initiator = table[i].initiator;
if (table[i].dar_valid)
*addr = regs->dar;
-
+ else if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
+ table[i].error_type == MCE_ERROR_TYPE_UE) {
+ /*
+ * We do a maximum of 4 nested MCE calls, see
+ * kernel/exception-64s.h
+ */
+ if (get_paca()->in_mce < MAX_MCE_DEPTH)
+ if (!mce_find_instr_ea_and_pfn(regs, addr,
+ phys_addr))
+ handled = 1;
+ }
found = 1;
}
@@ -592,19 +668,20 @@ static long mce_handle_error(struct pt_regs *regs,
const struct mce_ierror_table itable[])
{
struct mce_error_info mce_err = { 0 };
- uint64_t addr;
+ uint64_t addr, phys_addr;
uint64_t srr1 = regs->msr;
long handled;
if (SRR1_MC_LOADSTORE(srr1))
- handled = mce_handle_derror(regs, dtable, &mce_err, &addr);
+ handled = mce_handle_derror(regs, dtable, &mce_err, &addr,
+ &phys_addr);
else
handled = mce_handle_ierror(regs, itable, &mce_err, &addr);
if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
handled = mce_handle_ue_error(regs);
- save_mce_event(regs, handled, &mce_err, regs->nip, addr);
+ save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
return handled;
}