blob: c5cd87868c73b2f0cf8f3b0c24a23c3e0ff3515e [file] [log] [blame]
Paul Mundt26ff6c12006-09-27 15:13:36 +09001/*
2 * Page fault handler for SH with an MMU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 * Copyright (C) 1999 Niibe Yutaka
Paul Mundt0f60bb22009-07-05 03:18:47 +09005 * Copyright (C) 2003 - 2009 Paul Mundt
Linus Torvalds1da177e2005-04-16 15:20:36 -07006 *
7 * Based on linux/arch/i386/mm/fault.c:
8 * Copyright (C) 1995 Linus Torvalds
Paul Mundt26ff6c12006-09-27 15:13:36 +09009 *
10 * This file is subject to the terms and conditions of the GNU General Public
11 * License. See the file "COPYING" in the main directory of this archive
12 * for more details.
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <linux/mm.h>
Paul Mundt0f08f332006-09-27 17:03:56 +090016#include <linux/hardirq.h>
17#include <linux/kprobes.h>
Ingo Molnarcdd6c482009-09-21 12:02:48 +020018#include <linux/perf_event.h>
Magnus Damme7cc9a72008-02-07 20:18:21 +090019#include <asm/io_trapped.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/mmu_context.h>
Paul Mundtdb2e1fa2007-02-14 14:13:10 +090021#include <asm/tlbflush.h>
David Howellse839ca52012-03-28 18:30:03 +010022#include <asm/traps.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023
Paul Mundt7433ab7702009-06-25 02:30:10 +090024static inline int notify_page_fault(struct pt_regs *regs, int trap)
25{
26 int ret = 0;
27
Paul Mundtc63c3102009-07-05 02:50:10 +090028 if (kprobes_built_in() && !user_mode(regs)) {
Paul Mundt7433ab7702009-06-25 02:30:10 +090029 preempt_disable();
30 if (kprobe_running() && kprobe_fault_handler(regs, trap))
31 ret = 1;
32 preempt_enable();
33 }
Paul Mundt7433ab7702009-06-25 02:30:10 +090034
35 return ret;
36}
37
Stuart Menefy45c0e0e2012-04-19 17:25:03 +090038/*
39 * This is useful to dump out the page tables associated with
40 * 'addr' in mm 'mm'.
41 */
42static void show_pte(struct mm_struct *mm, unsigned long addr)
43{
44 pgd_t *pgd;
45
46 if (mm)
47 pgd = mm->pgd;
48 else
49 pgd = get_TTB();
50
51 printk(KERN_ALERT "pgd = %p\n", pgd);
52 pgd += pgd_index(addr);
53 printk(KERN_ALERT "[%08lx] *pgd=%0*Lx", addr,
54 sizeof(*pgd) * 2, (u64)pgd_val(*pgd));
55
56 do {
57 pud_t *pud;
58 pmd_t *pmd;
59 pte_t *pte;
60
61 if (pgd_none(*pgd))
62 break;
63
64 if (pgd_bad(*pgd)) {
65 printk("(bad)");
66 break;
67 }
68
69 pud = pud_offset(pgd, addr);
70 if (PTRS_PER_PUD != 1)
71 printk(", *pud=%0*Lx", sizeof(*pud) * 2,
72 (u64)pud_val(*pud));
73
74 if (pud_none(*pud))
75 break;
76
77 if (pud_bad(*pud)) {
78 printk("(bad)");
79 break;
80 }
81
82 pmd = pmd_offset(pud, addr);
83 if (PTRS_PER_PMD != 1)
84 printk(", *pmd=%0*Lx", sizeof(*pmd) * 2,
85 (u64)pmd_val(*pmd));
86
87 if (pmd_none(*pmd))
88 break;
89
90 if (pmd_bad(*pmd)) {
91 printk("(bad)");
92 break;
93 }
94
95 /* We must not map this if we have highmem enabled */
96 if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
97 break;
98
99 pte = pte_offset_kernel(pmd, addr);
100 printk(", *pte=%0*Lx", sizeof(*pte) * 2, (u64)pte_val(*pte));
101 } while (0);
102
103 printk("\n");
104}
105
Paul Mundt0f60bb22009-07-05 03:18:47 +0900106static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
107{
108 unsigned index = pgd_index(address);
109 pgd_t *pgd_k;
110 pud_t *pud, *pud_k;
111 pmd_t *pmd, *pmd_k;
112
113 pgd += index;
114 pgd_k = init_mm.pgd + index;
115
116 if (!pgd_present(*pgd_k))
117 return NULL;
118
119 pud = pud_offset(pgd, address);
120 pud_k = pud_offset(pgd_k, address);
121 if (!pud_present(*pud_k))
122 return NULL;
123
Matt Fleming5d9b4b12009-12-13 14:38:50 +0000124 if (!pud_present(*pud))
125 set_pud(pud, *pud_k);
126
Paul Mundt0f60bb22009-07-05 03:18:47 +0900127 pmd = pmd_offset(pud, address);
128 pmd_k = pmd_offset(pud_k, address);
129 if (!pmd_present(*pmd_k))
130 return NULL;
131
132 if (!pmd_present(*pmd))
133 set_pmd(pmd, *pmd_k);
Matt Fleming05dd2cd2009-07-13 11:38:04 +0000134 else {
135 /*
136 * The page tables are fully synchronised so there must
137 * be another reason for the fault. Return NULL here to
138 * signal that we have not taken care of the fault.
139 */
Paul Mundt0f60bb22009-07-05 03:18:47 +0900140 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
Matt Fleming05dd2cd2009-07-13 11:38:04 +0000141 return NULL;
142 }
Paul Mundt0f60bb22009-07-05 03:18:47 +0900143
144 return pmd_k;
145}
146
147/*
148 * Handle a fault on the vmalloc or module mapping area
149 */
150static noinline int vmalloc_fault(unsigned long address)
151{
152 pgd_t *pgd_k;
153 pmd_t *pmd_k;
154 pte_t *pte_k;
155
Paul Mundt0906a3a2009-09-03 17:21:10 +0900156 /* Make sure we are in vmalloc/module/P3 area: */
Stuart Menefy8d9a7842012-02-14 11:29:11 +0000157 if (!(address >= P3SEG && address < P3_ADDR_MAX))
Paul Mundt0f60bb22009-07-05 03:18:47 +0900158 return -1;
159
160 /*
161 * Synchronize this task's top level page-table
162 * with the 'reference' page table.
163 *
164 * Do _not_ use "current" here. We might be inside
165 * an interrupt in the middle of a task switch..
166 */
167 pgd_k = get_TTB();
Matt Fleming05dd2cd2009-07-13 11:38:04 +0000168 pmd_k = vmalloc_sync_one(pgd_k, address);
Paul Mundt0f60bb22009-07-05 03:18:47 +0900169 if (!pmd_k)
170 return -1;
171
172 pte_k = pte_offset_kernel(pmd_k, address);
173 if (!pte_present(*pte_k))
174 return -1;
175
176 return 0;
177}
178
179static int fault_in_kernel_space(unsigned long address)
180{
181 return address >= TASK_SIZE;
182}
183
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184/*
185 * This routine handles page faults. It determines the address,
186 * and the problem, and then passes it off to one of the appropriate
187 * routines.
188 */
Stuart Menefyb5a1bcb2006-11-21 13:34:04 +0900189asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
190 unsigned long writeaccess,
191 unsigned long address)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192{
Paul Mundt0f60bb22009-07-05 03:18:47 +0900193 unsigned long vec;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 struct task_struct *tsk;
195 struct mm_struct *mm;
196 struct vm_area_struct * vma;
Stuart Menefyb5a1bcb2006-11-21 13:34:04 +0900197 int si_code;
Nick Piggin83c54072007-07-19 01:47:05 -0700198 int fault;
Stuart Menefyb5a1bcb2006-11-21 13:34:04 +0900199 siginfo_t info;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 tsk = current;
Paul Mundt0f60bb22009-07-05 03:18:47 +0900202 mm = tsk->mm;
Stuart Menefyb5a1bcb2006-11-21 13:34:04 +0900203 si_code = SEGV_MAPERR;
Paul Mundt0f60bb22009-07-05 03:18:47 +0900204 vec = lookup_exception_vector();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205
Paul Mundt0f60bb22009-07-05 03:18:47 +0900206 /*
207 * We fault-in kernel-space virtual memory on-demand. The
208 * 'reference' page table is init_mm.pgd.
209 *
210 * NOTE! We MUST NOT take any locks for this case. We may
211 * be in an interrupt or a critical region, and should
212 * only copy the information from the master page table,
213 * nothing more.
214 */
215 if (unlikely(fault_in_kernel_space(address))) {
216 if (vmalloc_fault(address) >= 0)
Stuart Menefy99a596f2006-11-21 15:38:05 +0900217 return;
Paul Mundt0f60bb22009-07-05 03:18:47 +0900218 if (notify_page_fault(regs, vec))
Stuart Menefy96e14e52008-09-05 16:17:15 +0900219 return;
Stuart Menefy99a596f2006-11-21 15:38:05 +0900220
Paul Mundt0f60bb22009-07-05 03:18:47 +0900221 goto bad_area_nosemaphore;
Stuart Menefy99a596f2006-11-21 15:38:05 +0900222 }
223
Paul Mundt0f60bb22009-07-05 03:18:47 +0900224 if (unlikely(notify_page_fault(regs, vec)))
Paul Mundt7433ab7702009-06-25 02:30:10 +0900225 return;
226
227 /* Only enable interrupts if they were on before the fault */
228 if ((regs->sr & SR_IMASK) != SR_IMASK)
229 local_irq_enable();
230
Peter Zijlstraa8b0ca12011-06-27 14:41:57 +0200231 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
Paul Mundt7433ab7702009-06-25 02:30:10 +0900232
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 /*
Paul Mundt0f60bb22009-07-05 03:18:47 +0900234 * If we're in an interrupt, have no user context or are running
235 * in an atomic region then we must not take the fault:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 */
237 if (in_atomic() || !mm)
238 goto no_context;
239
240 down_read(&mm->mmap_sem);
241
242 vma = find_vma(mm, address);
243 if (!vma)
244 goto bad_area;
245 if (vma->vm_start <= address)
246 goto good_area;
247 if (!(vma->vm_flags & VM_GROWSDOWN))
248 goto bad_area;
249 if (expand_stack(vma, address))
250 goto bad_area;
Paul Mundt0f60bb22009-07-05 03:18:47 +0900251
252 /*
253 * Ok, we have a good vm_area for this memory access, so
254 * we can handle it..
255 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256good_area:
Stuart Menefyb5a1bcb2006-11-21 13:34:04 +0900257 si_code = SEGV_ACCERR;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 if (writeaccess) {
259 if (!(vma->vm_flags & VM_WRITE))
260 goto bad_area;
261 } else {
Jason Barondf67b3d2006-09-29 01:58:58 -0700262 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 goto bad_area;
264 }
265
266 /*
267 * If for any reason at all we couldn't handle the fault,
268 * make sure we exit gracefully rather than endlessly redo
269 * the fault.
270 */
Linus Torvaldsd06063c2009-04-10 09:01:23 -0700271 fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0);
Nick Piggin83c54072007-07-19 01:47:05 -0700272 if (unlikely(fault & VM_FAULT_ERROR)) {
273 if (fault & VM_FAULT_OOM)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274 goto out_of_memory;
Nick Piggin83c54072007-07-19 01:47:05 -0700275 else if (fault & VM_FAULT_SIGBUS)
276 goto do_sigbus;
277 BUG();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 }
Paul Mundt7433ab7702009-06-25 02:30:10 +0900279 if (fault & VM_FAULT_MAJOR) {
Nick Piggin83c54072007-07-19 01:47:05 -0700280 tsk->maj_flt++;
Peter Zijlstraa8b0ca12011-06-27 14:41:57 +0200281 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
Paul Mundt7433ab7702009-06-25 02:30:10 +0900282 regs, address);
283 } else {
Nick Piggin83c54072007-07-19 01:47:05 -0700284 tsk->min_flt++;
Peter Zijlstraa8b0ca12011-06-27 14:41:57 +0200285 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
Paul Mundt7433ab7702009-06-25 02:30:10 +0900286 regs, address);
287 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288
289 up_read(&mm->mmap_sem);
290 return;
291
Paul Mundt0f60bb22009-07-05 03:18:47 +0900292 /*
293 * Something tried to access memory that isn't in our memory map..
294 * Fix it, but check if it's kernel or user first..
295 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296bad_area:
297 up_read(&mm->mmap_sem);
298
Stuart Menefy99a596f2006-11-21 15:38:05 +0900299bad_area_nosemaphore:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 if (user_mode(regs)) {
Stuart Menefyb5a1bcb2006-11-21 13:34:04 +0900301 info.si_signo = SIGSEGV;
302 info.si_errno = 0;
303 info.si_code = si_code;
304 info.si_addr = (void *) address;
305 force_sig_info(SIGSEGV, &info, tsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 return;
307 }
308
309no_context:
310 /* Are we prepared to handle this kernel fault? */
311 if (fixup_exception(regs))
312 return;
313
Magnus Damme7cc9a72008-02-07 20:18:21 +0900314 if (handle_trapped_io(regs, address))
315 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316/*
317 * Oops. The kernel tried to access some bad page. We'll have to
318 * terminate things with extreme prejudice.
319 *
320 */
Paul Mundt0630e452007-06-18 19:02:47 +0900321
322 bust_spinlocks(1);
323
324 if (oops_may_print()) {
Stuart Menefy45c0e0e2012-04-19 17:25:03 +0900325 printk(KERN_ALERT
326 "Unable to handle kernel %s at virtual address %08lx\n",
327 (address < PAGE_SIZE) ? "NULL pointer dereference" :
328 "paging request", address);
Paul Mundt0630e452007-06-18 19:02:47 +0900329
Stuart Menefy45c0e0e2012-04-19 17:25:03 +0900330 show_pte(mm, address);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 }
Paul Mundt0630e452007-06-18 19:02:47 +0900332
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 die("Oops", regs, writeaccess);
Paul Mundt0630e452007-06-18 19:02:47 +0900334 bust_spinlocks(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 do_exit(SIGKILL);
336
337/*
338 * We ran out of memory, or some other thing happened to us that made
339 * us unable to handle the page fault gracefully.
340 */
341out_of_memory:
342 up_read(&mm->mmap_sem);
Nick Piggin6b6b18e2010-04-22 16:06:26 +0000343 if (!user_mode(regs))
344 goto no_context;
345 pagefault_out_of_memory();
346 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347
348do_sigbus:
349 up_read(&mm->mmap_sem);
350
351 /*
352 * Send a sigbus, regardless of whether we were in kernel
353 * or user mode.
354 */
Stuart Menefyb5a1bcb2006-11-21 13:34:04 +0900355 info.si_signo = SIGBUS;
356 info.si_errno = 0;
357 info.si_code = BUS_ADRERR;
358 info.si_addr = (void *)address;
359 force_sig_info(SIGBUS, &info, tsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360
361 /* Kernel mode? Handle exceptions or die */
362 if (!user_mode(regs))
363 goto no_context;
364}
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900365
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900366/*
367 * Called with interrupts disabled.
368 */
Paul Mundt112e5842009-08-15 02:49:40 +0900369asmlinkage int __kprobes
370handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess,
371 unsigned long address)
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900372{
373 pgd_t *pgd;
374 pud_t *pud;
375 pmd_t *pmd;
376 pte_t *pte;
377 pte_t entry;
Paul Mundt3d586952008-09-21 13:56:39 +0900378
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900379 /*
380 * We don't take page faults for P1, P2, and parts of P4, these
381 * are always mapped, whether it be due to legacy behaviour in
382 * 29-bit mode, or due to PMB configuration in 32-bit mode.
383 */
384 if (address >= P3SEG && address < P3_ADDR_MAX) {
385 pgd = pgd_offset_k(address);
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900386 } else {
Paul Mundt0f1a3942007-11-19 13:05:18 +0900387 if (unlikely(address >= TASK_SIZE || !current->mm))
Paul Mundt8010fbe2009-08-15 03:06:41 +0900388 return 1;
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900389
Paul Mundt0f1a3942007-11-19 13:05:18 +0900390 pgd = pgd_offset(current->mm, address);
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900391 }
392
393 pud = pud_offset(pgd, address);
394 if (pud_none_or_clear_bad(pud))
Paul Mundt8010fbe2009-08-15 03:06:41 +0900395 return 1;
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900396 pmd = pmd_offset(pud, address);
397 if (pmd_none_or_clear_bad(pmd))
Paul Mundt8010fbe2009-08-15 03:06:41 +0900398 return 1;
Paul Mundt0f1a3942007-11-19 13:05:18 +0900399 pte = pte_offset_kernel(pmd, address);
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900400 entry = *pte;
401 if (unlikely(pte_none(entry) || pte_not_present(entry)))
Paul Mundt8010fbe2009-08-15 03:06:41 +0900402 return 1;
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900403 if (unlikely(writeaccess && !pte_write(entry)))
Paul Mundt8010fbe2009-08-15 03:06:41 +0900404 return 1;
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900405
406 if (writeaccess)
407 entry = pte_mkdirty(entry);
408 entry = pte_mkyoung(entry);
409
Paul Mundt8010fbe2009-08-15 03:06:41 +0900410 set_pte(pte, entry);
411
Hideo Saitoa602cc02008-02-14 14:45:08 +0900412#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
413 /*
Paul Mundt8010fbe2009-08-15 03:06:41 +0900414 * SH-4 does not set MMUCR.RC to the corresponding TLB entry in
415 * the case of an initial page write exception, so we need to
416 * flush it in order to avoid potential TLB entry duplication.
Hideo Saitoa602cc02008-02-14 14:45:08 +0900417 */
Paul Mundt8010fbe2009-08-15 03:06:41 +0900418 if (writeaccess == 2)
419 local_flush_tlb_one(get_asid(), address & PAGE_MASK);
Hideo Saitoa602cc02008-02-14 14:45:08 +0900420#endif
421
Russell King4b3073e2009-12-18 16:40:18 +0000422 update_mmu_cache(NULL, address, pte);
Paul Mundt0f1a3942007-11-19 13:05:18 +0900423
Paul Mundt8010fbe2009-08-15 03:06:41 +0900424 return 0;
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900425}