summaryrefslogtreecommitdiff
path: root/arch/arm/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/mm')
-rw-r--r--arch/arm/mm/Kconfig4
-rw-r--r--arch/arm/mm/alignment.c6
-rw-r--r--arch/arm/mm/cache-aurora-l2.h55
-rw-r--r--arch/arm/mm/cache-l2x0.c278
-rw-r--r--arch/arm/mm/cache-v7.S6
-rw-r--r--arch/arm/mm/context.c207
-rw-r--r--arch/arm/mm/dma-mapping.c47
-rw-r--r--arch/arm/mm/idmap.c14
-rw-r--r--arch/arm/mm/ioremap.c16
-rw-r--r--arch/arm/mm/mmap.c134
-rw-r--r--arch/arm/mm/mmu.c18
-rw-r--r--arch/arm/mm/proc-macros.S4
-rw-r--r--arch/arm/mm/proc-v6.S2
-rw-r--r--arch/arm/mm/proc-v7-2level.S10
-rw-r--r--arch/arm/mm/proc-v7-3level.S5
-rw-r--r--arch/arm/mm/proc-v7.S69
-rw-r--r--arch/arm/mm/vmregion.h1
17 files changed, 603 insertions, 273 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 94186b6c685..3fd629d5a51 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -352,6 +352,10 @@ config CPU_PJ4
select ARM_THUMBEE
select CPU_V7
+config CPU_PJ4B
+ bool
+ select CPU_V7
+
# ARMv6
config CPU_V6
bool "Support ARM V6 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index b9f60ebe3bc..b820edaf318 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -745,7 +745,7 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs,
static int
do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
- union offset_union offset;
+ union offset_union uninitialized_var(offset);
unsigned long instr = 0, instrptr;
int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs);
unsigned int type;
@@ -856,8 +856,10 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (thumb2_32b) {
offset.un = 0;
handler = do_alignment_t32_to_handler(&instr, regs, &offset);
- } else
+ } else {
+ offset.un = 0;
handler = do_alignment_ldmstm;
+ }
break;
default:
diff --git a/arch/arm/mm/cache-aurora-l2.h b/arch/arm/mm/cache-aurora-l2.h
new file mode 100644
index 00000000000..c8612476983
--- /dev/null
+++ b/arch/arm/mm/cache-aurora-l2.h
@@ -0,0 +1,55 @@
+/*
+ * AURORA shared L2 cache controller support
+ *
+ * Copyright (C) 2012 Marvell
+ *
+ * Yehuda Yitschak <yehuday@marvell.com>
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __ASM_ARM_HARDWARE_AURORA_L2_H
+#define __ASM_ARM_HARDWARE_AURORA_L2_H
+
+#define AURORA_SYNC_REG 0x700
+#define AURORA_RANGE_BASE_ADDR_REG 0x720
+#define AURORA_FLUSH_PHY_ADDR_REG 0x7f0
+#define AURORA_INVAL_RANGE_REG 0x774
+#define AURORA_CLEAN_RANGE_REG 0x7b4
+#define AURORA_FLUSH_RANGE_REG 0x7f4
+
+#define AURORA_ACR_REPLACEMENT_OFFSET 27
+#define AURORA_ACR_REPLACEMENT_MASK \
+ (0x3 << AURORA_ACR_REPLACEMENT_OFFSET)
+#define AURORA_ACR_REPLACEMENT_TYPE_WAYRR \
+ (0 << AURORA_ACR_REPLACEMENT_OFFSET)
+#define AURORA_ACR_REPLACEMENT_TYPE_LFSR \
+ (1 << AURORA_ACR_REPLACEMENT_OFFSET)
+#define AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU \
+ (3 << AURORA_ACR_REPLACEMENT_OFFSET)
+
+#define AURORA_ACR_FORCE_WRITE_POLICY_OFFSET 0
+#define AURORA_ACR_FORCE_WRITE_POLICY_MASK \
+ (0x3 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+#define AURORA_ACR_FORCE_WRITE_POLICY_DIS \
+ (0 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+#define AURORA_ACR_FORCE_WRITE_BACK_POLICY \
+ (1 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+#define AURORA_ACR_FORCE_WRITE_THRO_POLICY \
+ (2 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+
+#define MAX_RANGE_SIZE 1024
+
+#define AURORA_WAY_SIZE_SHIFT 2
+
+#define AURORA_CTRL_FW 0x100
+
+/* chose a number outside L2X0_CACHE_ID_PART_MASK to be sure to make
+ * the distinction between a number coming from hardware and a number
+ * coming from the device tree */
+#define AURORA_CACHE_ID 0x100
+
+#endif /* __ASM_ARM_HARDWARE_AURORA_L2_H */
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 8a97e6443c6..6911b8b2745 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -25,6 +25,7 @@
#include <asm/cacheflush.h>
#include <asm/hardware/cache-l2x0.h>
+#include "cache-aurora-l2.h"
#define CACHE_LINE_SIZE 32
@@ -34,14 +35,20 @@ static u32 l2x0_way_mask; /* Bitmask of active ways */
static u32 l2x0_size;
static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
+/* Aurora don't have the cache ID register available, so we have to
+ * pass it though the device tree */
+static u32 cache_id_part_number_from_dt;
+
struct l2x0_regs l2x0_saved_regs;
struct l2x0_of_data {
void (*setup)(const struct device_node *, u32 *, u32 *);
void (*save)(void);
- void (*resume)(void);
+ struct outer_cache_fns outer_cache;
};
+static bool of_init = false;
+
static inline void cache_wait_way(void __iomem *reg, unsigned long mask)
{
/* wait for cache operation by line or way to complete */
@@ -168,7 +175,7 @@ static void l2x0_inv_all(void)
/* invalidate all ways */
raw_spin_lock_irqsave(&l2x0_lock, flags);
/* Invalidating when L2 is enabled is a nono */
- BUG_ON(readl(l2x0_base + L2X0_CTRL) & 1);
+ BUG_ON(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN);
writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY);
cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask);
cache_sync();
@@ -292,11 +299,18 @@ static void l2x0_unlock(u32 cache_id)
int lockregs;
int i;
- if (cache_id == L2X0_CACHE_ID_PART_L310)
+ switch (cache_id) {
+ case L2X0_CACHE_ID_PART_L310:
lockregs = 8;
- else
+ break;
+ case AURORA_CACHE_ID:
+ lockregs = 4;
+ break;
+ default:
/* L210 and unknown types */
lockregs = 1;
+ break;
+ }
for (i = 0; i < lockregs; i++) {
writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE +
@@ -312,18 +326,22 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
u32 cache_id;
u32 way_size = 0;
int ways;
+ int way_size_shift = L2X0_WAY_SIZE_SHIFT;
const char *type;
l2x0_base = base;
-
- cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
+ if (cache_id_part_number_from_dt)
+ cache_id = cache_id_part_number_from_dt;
+ else
+ cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID)
+ & L2X0_CACHE_ID_PART_MASK;
aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
aux &= aux_mask;
aux |= aux_val;
/* Determine the number of ways */
- switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
+ switch (cache_id) {
case L2X0_CACHE_ID_PART_L310:
if (aux & (1 << 16))
ways = 16;
@@ -340,6 +358,14 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
ways = (aux >> 13) & 0xf;
type = "L210";
break;
+
+ case AURORA_CACHE_ID:
+ sync_reg_offset = AURORA_SYNC_REG;
+ ways = (aux >> 13) & 0xf;
+ ways = 2 << ((ways + 1) >> 2);
+ way_size_shift = AURORA_WAY_SIZE_SHIFT;
+ type = "Aurora";
+ break;
default:
/* Assume unknown chips have 8 ways */
ways = 8;
@@ -353,7 +379,8 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
* L2 cache Size = Way size * Number of ways
*/
way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17;
- way_size = 1 << (way_size + 3);
+ way_size = 1 << (way_size + way_size_shift);
+
l2x0_size = ways * way_size * SZ_1K;
/*
@@ -361,7 +388,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
* If you are booting from non-secure mode
* accessing the below registers will fault.
*/
- if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+ if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
/* Make sure that I&D is not locked down when starting */
l2x0_unlock(cache_id);
@@ -371,7 +398,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
l2x0_inv_all();
/* enable L2X0 */
- writel_relaxed(1, l2x0_base + L2X0_CTRL);
+ writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL);
}
/* Re-read it in case some bits are reserved. */
@@ -380,13 +407,15 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
/* Save the value for resuming. */
l2x0_saved_regs.aux_ctrl = aux;
- outer_cache.inv_range = l2x0_inv_range;
- outer_cache.clean_range = l2x0_clean_range;
- outer_cache.flush_range = l2x0_flush_range;
- outer_cache.sync = l2x0_cache_sync;
- outer_cache.flush_all = l2x0_flush_all;
- outer_cache.inv_all = l2x0_inv_all;
- outer_cache.disable = l2x0_disable;
+ if (!of_init) {
+ outer_cache.inv_range = l2x0_inv_range;
+ outer_cache.clean_range = l2x0_clean_range;
+ outer_cache.flush_range = l2x0_flush_range;
+ outer_cache.sync = l2x0_cache_sync;
+ outer_cache.flush_all = l2x0_flush_all;
+ outer_cache.inv_all = l2x0_inv_all;
+ outer_cache.disable = l2x0_disable;
+ }
printk(KERN_INFO "%s cache controller enabled\n", type);
printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n",
@@ -394,6 +423,100 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
}
#ifdef CONFIG_OF
+static int l2_wt_override;
+
+/*
+ * Note that the end addresses passed to Linux primitives are
+ * noninclusive, while the hardware cache range operations use
+ * inclusive start and end addresses.
+ */
+static unsigned long calc_range_end(unsigned long start, unsigned long end)
+{
+ /*
+ * Limit the number of cache lines processed at once,
+ * since cache range operations stall the CPU pipeline
+ * until completion.
+ */
+ if (end > start + MAX_RANGE_SIZE)
+ end = start + MAX_RANGE_SIZE;
+
+ /*
+ * Cache range operations can't straddle a page boundary.
+ */
+ if (end > PAGE_ALIGN(start+1))
+ end = PAGE_ALIGN(start+1);
+
+ return end;
+}
+
+/*
+ * Make sure 'start' and 'end' reference the same page, as L2 is PIPT
+ * and range operations only do a TLB lookup on the start address.
+ */
+static void aurora_pa_range(unsigned long start, unsigned long end,
+ unsigned long offset)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ writel(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG);
+ writel(end, l2x0_base + offset);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+
+ cache_sync();
+}
+
+static void aurora_inv_range(unsigned long start, unsigned long end)
+{
+ /*
+ * round start and end adresses up to cache line size
+ */
+ start &= ~(CACHE_LINE_SIZE - 1);
+ end = ALIGN(end, CACHE_LINE_SIZE);
+
+ /*
+ * Invalidate all full cache lines between 'start' and 'end'.
+ */
+ while (start < end) {
+ unsigned long range_end = calc_range_end(start, end);
+ aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
+ AURORA_INVAL_RANGE_REG);
+ start = range_end;
+ }
+}
+
+static void aurora_clean_range(unsigned long start, unsigned long end)
+{
+ /*
+ * If L2 is forced to WT, the L2 will always be clean and we
+ * don't need to do anything here.
+ */
+ if (!l2_wt_override) {
+ start &= ~(CACHE_LINE_SIZE - 1);
+ end = ALIGN(end, CACHE_LINE_SIZE);
+ while (start != end) {
+ unsigned long range_end = calc_range_end(start, end);
+ aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
+ AURORA_CLEAN_RANGE_REG);
+ start = range_end;
+ }
+ }
+}
+
+static void aurora_flush_range(unsigned long start, unsigned long end)
+{
+ if (!l2_wt_override) {
+ start &= ~(CACHE_LINE_SIZE - 1);
+ end = ALIGN(end, CACHE_LINE_SIZE);
+ while (start != end) {
+ unsigned long range_end = calc_range_end(start, end);
+ aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
+ AURORA_FLUSH_RANGE_REG);
+ start = range_end;
+ }
+ }
+}
+
static void __init l2x0_of_setup(const struct device_node *np,
u32 *aux_val, u32 *aux_mask)
{
@@ -491,9 +614,15 @@ static void __init pl310_save(void)
}
}
+static void aurora_save(void)
+{
+ l2x0_saved_regs.ctrl = readl_relaxed(l2x0_base + L2X0_CTRL);
+ l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+}
+
static void l2x0_resume(void)
{
- if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+ if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
/* restore aux ctrl and enable l2 */
l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID));
@@ -502,7 +631,7 @@ static void l2x0_resume(void)
l2x0_inv_all();
- writel_relaxed(1, l2x0_base + L2X0_CTRL);
+ writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL);
}
}
@@ -510,7 +639,7 @@ static void pl310_resume(void)
{
u32 l2x0_revision;
- if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+ if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
/* restore pl310 setup */
writel_relaxed(l2x0_saved_regs.tag_latency,
l2x0_base + L2X0_TAG_LATENCY_CTRL);
@@ -536,22 +665,108 @@ static void pl310_resume(void)
l2x0_resume();
}
+static void aurora_resume(void)
+{
+ if (!(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+ writel(l2x0_saved_regs.aux_ctrl, l2x0_base + L2X0_AUX_CTRL);
+ writel(l2x0_saved_regs.ctrl, l2x0_base + L2X0_CTRL);
+ }
+}
+
+static void __init aurora_broadcast_l2_commands(void)
+{
+ __u32 u;
+ /* Enable Broadcasting of cache commands to L2*/
+ __asm__ __volatile__("mrc p15, 1, %0, c15, c2, 0" : "=r"(u));
+ u |= AURORA_CTRL_FW; /* Set the FW bit */
+ __asm__ __volatile__("mcr p15, 1, %0, c15, c2, 0\n" : : "r"(u));
+ isb();
+}
+
+static void __init aurora_of_setup(const struct device_node *np,
+ u32 *aux_val, u32 *aux_mask)
+{
+ u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
+ u32 mask = AURORA_ACR_REPLACEMENT_MASK;
+
+ of_property_read_u32(np, "cache-id-part",
+ &cache_id_part_number_from_dt);
+
+ /* Determine and save the write policy */
+ l2_wt_override = of_property_read_bool(np, "wt-override");
+
+ if (l2_wt_override) {
+ val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
+ mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
+ }
+
+ *aux_val &= ~mask;
+ *aux_val |= val;
+ *aux_mask &= ~mask;
+}
+
static const struct l2x0_of_data pl310_data = {
- pl310_of_setup,
- pl310_save,
- pl310_resume,
+ .setup = pl310_of_setup,
+ .save = pl310_save,
+ .outer_cache = {
+ .resume = pl310_resume,
+ .inv_range = l2x0_inv_range,
+ .clean_range = l2x0_clean_range,
+ .flush_range = l2x0_flush_range,
+ .sync = l2x0_cache_sync,
+ .flush_all = l2x0_flush_all,
+ .inv_all = l2x0_inv_all,
+ .disable = l2x0_disable,
+ .set_debug = pl310_set_debug,
+ },
};
static const struct l2x0_of_data l2x0_data = {
- l2x0_of_setup,
- NULL,
- l2x0_resume,
+ .setup = l2x0_of_setup,
+ .save = NULL,
+ .outer_cache = {
+ .resume = l2x0_resume,
+ .inv_range = l2x0_inv_range,
+ .clean_range = l2x0_clean_range,
+ .flush_range = l2x0_flush_range,
+ .sync = l2x0_cache_sync,
+ .flush_all = l2x0_flush_all,
+ .inv_all = l2x0_inv_all,
+ .disable = l2x0_disable,
+ },
+};
+
+static const struct l2x0_of_data aurora_with_outer_data = {
+ .setup = aurora_of_setup,
+ .save = aurora_save,
+ .outer_cache = {
+ .resume = aurora_resume,
+ .inv_range = aurora_inv_range,
+ .clean_range = aurora_clean_range,
+ .flush_range = aurora_flush_range,
+ .sync = l2x0_cache_sync,
+ .flush_all = l2x0_flush_all,
+ .inv_all = l2x0_inv_all,
+ .disable = l2x0_disable,
+ },
+};
+
+static const struct l2x0_of_data aurora_no_outer_data = {
+ .setup = aurora_of_setup,
+ .save = aurora_save,
+ .outer_cache = {
+ .resume = aurora_resume,
+ },
};
static const struct of_device_id l2x0_ids[] __initconst = {
{ .compatible = "arm,pl310-cache", .data = (void *)&pl310_data },
{ .compatible = "arm,l220-cache", .data = (void *)&l2x0_data },
{ .compatible = "arm,l210-cache", .data = (void *)&l2x0_data },
+ { .compatible = "marvell,aurora-system-cache",
+ .data = (void *)&aurora_no_outer_data},
+ { .compatible = "marvell,aurora-outer-cache",
+ .data = (void *)&aurora_with_outer_data},
{}
};
@@ -577,17 +792,24 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
data = of_match_node(l2x0_ids, np)->data;
/* L2 configuration can only be changed if the cache is disabled */
- if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+ if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
if (data->setup)
data->setup(np, &aux_val, &aux_mask);
+
+ /* For aurora cache in no outer mode select the
+ * correct mode using the coprocessor*/
+ if (data == &aurora_no_outer_data)
+ aurora_broadcast_l2_commands();
}
if (data->save)
data->save();
+ of_init = true;
l2x0_init(l2x0_base, aux_val, aux_mask);
- outer_cache.resume = data->resume;
+ memcpy(&outer_cache, &data->outer_cache, sizeof(outer_cache));
+
return 0;
}
#endif
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index cd956647c21..7539ec27506 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -44,8 +44,10 @@ ENDPROC(v7_flush_icache_all)
ENTRY(v7_flush_dcache_louis)
dmb @ ensure ordering with previous memory accesses
mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr
- ands r3, r0, #0xe00000 @ extract LoUIS from clidr
- mov r3, r3, lsr #20 @ r3 = LoUIS * 2
+ ALT_SMP(ands r3, r0, #(7 << 21)) @ extract LoUIS from clidr
+ ALT_UP(ands r3, r0, #(7 << 27)) @ extract LoUU from clidr
+ ALT_SMP(mov r3, r3, lsr #20) @ r3 = LoUIS * 2
+ ALT_UP(mov r3, r3, lsr #26) @ r3 = LoUU * 2
moveq pc, lr @ return if level == 0
mov r10, #0 @ r10 (starting level) = 0
b flush_levels @ start flushing cache levels
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 4e07eec1270..bc4a5e9ebb7 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -2,6 +2,9 @@
* linux/arch/arm/mm/context.c
*
* Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
+ * Copyright (C) 2012 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -14,14 +17,40 @@
#include <linux/percpu.h>
#include <asm/mmu_context.h>
+#include <asm/smp_plat.h>
#include <asm/thread_notify.h>
#include <asm/tlbflush.h>
+/*
+ * On ARMv6, we have the following structure in the Context ID:
+ *
+ * 31 7 0
+ * +-------------------------+-----------+
+ * | process ID | ASID |
+ * +-------------------------+-----------+
+ * | context ID |
+ * +-------------------------------------+
+ *
+ * The ASID is used to tag entries in the CPU caches and TLBs.
+ * The context ID is used by debuggers and trace logic, and
+ * should be unique within all running processes.
+ */
+#define ASID_FIRST_VERSION (1ULL << ASID_BITS)
+#define NUM_USER_ASIDS (ASID_FIRST_VERSION - 1)
+
+#define ASID_TO_IDX(asid) ((asid & ~ASID_MASK) - 1)
+#define IDX_TO_ASID(idx) ((idx + 1) & ~ASID_MASK)
+
static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-unsigned int cpu_last_asid = ASID_FIRST_VERSION;
+static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION);
+static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS);
+
+static DEFINE_PER_CPU(atomic64_t, active_asids);
+static DEFINE_PER_CPU(u64, reserved_asids);
+static cpumask_t tlb_flush_pending;
#ifdef CONFIG_ARM_LPAE
-void cpu_set_reserved_ttbr0(void)
+static void cpu_set_reserved_ttbr0(void)
{
unsigned long ttbl = __pa(swapper_pg_dir);
unsigned long ttbh = 0;
@@ -37,7 +66,7 @@ void cpu_set_reserved_ttbr0(void)
isb();
}
#else
-void cpu_set_reserved_ttbr0(void)
+static void cpu_set_reserved_ttbr0(void)
{
u32 ttb;
/* Copy TTBR1 into TTBR0 */
@@ -84,124 +113,104 @@ static int __init contextidr_notifier_init(void)
arch_initcall(contextidr_notifier_init);
#endif
-/*
- * We fork()ed a process, and we need a new context for the child
- * to run in.
- */
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+static void flush_context(unsigned int cpu)
{
- mm->context.id = 0;
- raw_spin_lock_init(&mm->context.id_lock);
-}
+ int i;
+ u64 asid;
+
+ /* Update the list of reserved ASIDs and the ASID bitmap. */
+ bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
+ for_each_possible_cpu(i) {
+ if (i == cpu) {
+ asid = 0;
+ } else {
+ asid = atomic64_xchg(&per_cpu(active_asids, i), 0);
+ __set_bit(ASID_TO_IDX(asid), asid_map);
+ }
+ per_cpu(reserved_asids, i) = asid;
+ }
-static void flush_context(void)
-{
- cpu_set_reserved_ttbr0();
- local_flush_tlb_all();
- if (icache_is_vivt_asid_tagged()) {
+ /* Queue a TLB invalidate and flush the I-cache if necessary. */
+ if (!tlb_ops_need_broadcast())
+ cpumask_set_cpu(cpu, &tlb_flush_pending);
+ else
+ cpumask_setall(&tlb_flush_pending);
+
+ if (icache_is_vivt_asid_tagged())
__flush_icache_all();
- dsb();
- }
}
-#ifdef CONFIG_SMP
+static int is_reserved_asid(u64 asid)
+{
+ int cpu;
+ for_each_possible_cpu(cpu)
+ if (per_cpu(reserved_asids, cpu) == asid)
+ return 1;
+ return 0;
+}
-static void set_mm_context(struct mm_struct *mm, unsigned int asid)
+static void new_context(struct mm_struct *mm, unsigned int cpu)
{
- unsigned long flags;
+ u64 asid = mm->context.id;
+ u64 generation = atomic64_read(&asid_generation);
- /*
- * Locking needed for multi-threaded applications where the
- * same mm->context.id could be set from different CPUs during
- * the broadcast. This function is also called via IPI so the
- * mm->context.id_lock has to be IRQ-safe.
- */
- raw_spin_lock_irqsave(&mm->context.id_lock, flags);
- if (likely((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) {
+ if (asid != 0 && is_reserved_asid(asid)) {
/*
- * Old version of ASID found. Set the new one and
- * reset mm_cpumask(mm).
+ * Our current ASID was active during a rollover, we can
+ * continue to use it and this was just a false alarm.
*/
- mm->context.id = asid;
+ asid = generation | (asid & ~ASID_MASK);
+ } else {
+ /*
+ * Allocate a free ASID. If we can't find one, take a
+ * note of the currently active ASIDs and mark the TLBs
+ * as requiring flushes.
+ */
+ asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS);
+ if (asid == NUM_USER_ASIDS) {
+ generation = atomic64_add_return(ASID_FIRST_VERSION,
+ &asid_generation);
+ flush_context(cpu);
+ asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS);
+ }
+ __set_bit(asid, asid_map);
+ asid = generation | IDX_TO_ASID(asid);
cpumask_clear(mm_cpumask(mm));
}
- raw_spin_unlock_irqrestore(&mm->context.id_lock, flags);
- /*
- * Set the mm_cpumask(mm) bit for the current CPU.
- */
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+ mm->context.id = asid;
}
-/*
- * Reset the ASID on the current CPU. This function call is broadcast
- * from the CPU handling the ASID rollover and holding cpu_asid_lock.
- */
-static void reset_context(void *info)
+void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
{
- unsigned int asid;
+ unsigned long flags;
unsigned int cpu = smp_processor_id();
- struct mm_struct *mm = current->active_mm;
- smp_rmb();
- asid = cpu_last_asid + cpu + 1;
+ if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
+ __check_vmalloc_seq(mm);
- flush_context();
- set_mm_context(mm, asid);
-
- /* set the new ASID */
- cpu_switch_mm(mm->pgd, mm);
-}
+ /*
+ * Required during context switch to avoid speculative page table
+ * walking with the wrong TTBR.
+ */
+ cpu_set_reserved_ttbr0();
-#else
+ if (!((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS)
+ && atomic64_xchg(&per_cpu(active_asids, cpu), mm->context.id))
+ goto switch_mm_fastpath;
-static inline void set_mm_context(struct mm_struct *mm, unsigned int asid)
-{
- mm->context.id = asid;
- cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id()));
-}
+ raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+ /* Check that our ASID belongs to the current generation. */
+ if ((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS)
+ new_context(mm, cpu);
-#endif
+ atomic64_set(&per_cpu(active_asids, cpu), mm->context.id);
+ cpumask_set_cpu(cpu, mm_cpumask(mm));
-void __new_context(struct mm_struct *mm)
-{
- unsigned int asid;
+ if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+ local_flush_tlb_all();
+ raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
- raw_spin_lock(&cpu_asid_lock);
-#ifdef CONFIG_SMP
- /*
- * Check the ASID again, in case the change was broadcast from
- * another CPU before we acquired the lock.
- */
- if (unlikely(((mm->context.id ^ cpu_last_asid) >> ASID_BITS) == 0)) {
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
- raw_spin_unlock(&cpu_asid_lock);
- return;
- }
-#endif
- /*
- * At this point, it is guaranteed that the current mm (with
- * an old ASID) isn't active on any other CPU since the ASIDs
- * are changed simultaneously via IPI.
- */
- asid = ++cpu_last_asid;
- if (asid == 0)
- asid = cpu_last_asid = ASID_FIRST_VERSION;
-
- /*
- * If we've used up all our ASIDs, we need
- * to start a new version and flush the TLB.
- */
- if (unlikely((asid & ~ASID_MASK) == 0)) {
- asid = cpu_last_asid + smp_processor_id() + 1;
- flush_context();
-#ifdef CONFIG_SMP
- smp_wmb();
- smp_call_function(reset_context, NULL, 1);
-#endif
- cpu_last_asid += NR_CPUS;
- }
-
- set_mm_context(mm, asid);
- raw_spin_unlock(&cpu_asid_lock);
+switch_mm_fastpath:
+ cpu_switch_mm(mm->pgd, mm);
}
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 477a2d23ddf..6b2fb87c869 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -124,8 +124,6 @@ static void arm_dma_sync_single_for_device(struct device *dev,
__dma_page_cpu_to_dev(page, offset, size, dir);
}
-static int arm_dma_set_mask(struct device *dev, u64 dma_mask);
-
struct dma_map_ops arm_dma_ops = {
.alloc = arm_dma_alloc,
.free = arm_dma_free,
@@ -610,7 +608,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller)
{
u64 mask = get_coherent_dma_mask(dev);
- struct page *page;
+ struct page *page = NULL;
void *addr;
#ifdef CONFIG_DMA_API_DEBUG
@@ -971,7 +969,7 @@ int dma_supported(struct device *dev, u64 mask)
}
EXPORT_SYMBOL(dma_supported);
-static int arm_dma_set_mask(struct device *dev, u64 dma_mask)
+int arm_dma_set_mask(struct device *dev, u64 dma_mask)
{
if (!dev->dma_mask || !dma_supported(dev, dma_mask))
return -EIO;
@@ -1036,7 +1034,8 @@ static inline void __free_iova(struct dma_iommu_mapping *mapping,
spin_unlock_irqrestore(&mapping->lock, flags);
}
-static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp)
+static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
+ gfp_t gfp, struct dma_attrs *attrs)
{
struct page **pages;
int count = size >> PAGE_SHIFT;
@@ -1050,6 +1049,23 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t
if (!pages)
return NULL;
+ if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs))
+ {
+ unsigned long order = get_order(size);
+ struct page *page;
+
+ page = dma_alloc_from_contiguous(dev, count, order);
+ if (!page)
+ goto error;
+
+ __dma_clear_buffer(page, size);
+
+ for (i = 0; i < count; i++)
+ pages[i] = page + i;
+
+ return pages;
+ }
+
while (count) {
int j, order = __fls(count);
@@ -1083,14 +1099,21 @@ error:
return NULL;
}
-static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size)
+static int __iommu_free_buffer(struct device *dev, struct page **pages,
+ size_t size, struct dma_attrs *attrs)
{
int count = size >> PAGE_SHIFT;
int array_size = count * sizeof(struct page *);
int i;
- for (i = 0; i < count; i++)
- if (pages[i])
- __free_pages(pages[i], 0);
+
+ if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) {
+ dma_release_from_contiguous(dev, pages[0], count);
+ } else {
+ for (i = 0; i < count; i++)
+ if (pages[i])
+ __free_pages(pages[i], 0);
+ }
+
if (array_size <= PAGE_SIZE)
kfree(pages);
else
@@ -1252,7 +1275,7 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
if (gfp & GFP_ATOMIC)
return __iommu_alloc_atomic(dev, size, handle);
- pages = __iommu_alloc_buffer(dev, size, gfp);
+ pages = __iommu_alloc_buffer(dev, size, gfp, attrs);
if (!pages)
return NULL;
@@ -1273,7 +1296,7 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
err_mapping:
__iommu_remove_mapping(dev, *handle, size);
err_buffer:
- __iommu_free_buffer(dev, pages, size);
+ __iommu_free_buffer(dev, pages, size, attrs);
return NULL;
}
@@ -1329,7 +1352,7 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
}
__iommu_remove_mapping(dev, handle, size);
- __iommu_free_buffer(dev, pages, size);
+ __iommu_free_buffer(dev, pages, size, attrs);
}
static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index ab88ed4f8e0..99db769307e 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -92,6 +92,9 @@ static int __init init_static_idmap(void)
(long long)idmap_start, (long long)idmap_end);
identity_mapping_add(idmap_pgd, idmap_start, idmap_end);
+ /* Flush L1 for the hardware to see this page table content */
+ flush_cache_louis();
+
return 0;
}
early_initcall(init_static_idmap);
@@ -103,12 +106,15 @@ early_initcall(init_static_idmap);
*/
void setup_mm_for_reboot(void)
{
- /* Clean and invalidate L1. */
- flush_cache_all();
-
/* Switch to the identity mapping. */
cpu_switch_mm(idmap_pgd, &init_mm);
- /* Flush the TLB. */
+#ifdef CONFIG_CPU_HAS_ASID
+ /*
+ * We don't have a clean ASID for the identity mapping, which
+ * may clash with virtual addresses of the previous page tables
+ * and therefore potentially in the TLB.
+ */
local_flush_tlb_all();
+#endif
}
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 5dcc2fd46c4..88fd86cf3d9 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -47,18 +47,18 @@ int ioremap_page(unsigned long virt, unsigned long phys,
}
EXPORT_SYMBOL(ioremap_page);
-void __check_kvm_seq(struct mm_struct *mm)
+void __check_vmalloc_seq(struct mm_struct *mm)
{
unsigned int seq;
do {
- seq = init_mm.context.kvm_seq;
+ seq = init_mm.context.vmalloc_seq;
memcpy(pgd_offset(mm, VMALLOC_START),
pgd_offset_k(VMALLOC_START),
sizeof(pgd_t) * (pgd_index(VMALLOC_END) -
pgd_index(VMALLOC_START)));
- mm->context.kvm_seq = seq;
- } while (seq != init_mm.context.kvm_seq);
+ mm->context.vmalloc_seq = seq;
+ } while (seq != init_mm.context.vmalloc_seq);
}
#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
@@ -89,13 +89,13 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
if (!pmd_none(pmd)) {
/*
* Clear the PMD from the page table, and
- * increment the kvm sequence so others
+ * increment the vmalloc sequence so others
* notice this change.
*
* Note: this is still racy on SMP machines.
*/
pmd_clear(pmdp);
- init_mm.context.kvm_seq++;
+ init_mm.context.vmalloc_seq++;
/*
* Free the page table, if there was one.
@@ -112,8 +112,8 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
* Ensure that the active_mm is up to date - we want to
* catch any use-after-iounmap cases.
*/
- if (current->active_mm->context.kvm_seq != init_mm.context.kvm_seq)
- __check_kvm_seq(current->active_mm);
+ if (current->active_mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)
+ __check_vmalloc_seq(current->active_mm);
flush_tlb_kernel_range(virt, end);
}
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index ce8cb1970d7..10062ceadd1 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -11,18 +11,6 @@
#include <linux/random.h>
#include <asm/cachetype.h>
-static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr,
- unsigned long pgoff)
-{
- unsigned long base = addr & ~(SHMLBA-1);
- unsigned long off = (pgoff << PAGE_SHIFT) & (SHMLBA-1);
-
- if (base + off <= addr)
- return base + off;
-
- return base - off;
-}
-
#define COLOUR_ALIGN(addr,pgoff) \
((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \
(((pgoff)<<PAGE_SHIFT) & (SHMLBA-1)))
@@ -69,9 +57,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- unsigned long start_addr;
int do_align = 0;
int aliasing = cache_is_vipt_aliasing();
+ struct vm_unmapped_area_info info;
/*
* We only need to do colour alignment if either the I or D
@@ -104,46 +92,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
(!vma || addr + len <= vma->vm_start))
return addr;
}
- if (len > mm->cached_hole_size) {
- start_addr = addr = mm->free_area_cache;
- } else {
- start_addr = addr = mm->mmap_base;
- mm->cached_hole_size = 0;
- }
-full_search:
- if (do_align)
- addr = COLOUR_ALIGN(addr, pgoff);
- else
- addr = PAGE_ALIGN(addr);
-
- for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
- /* At this point: (!vma || addr < vma->vm_end). */
- if (TASK_SIZE - len < addr) {
- /*
- * Start a new search - just in case we missed
- * some holes.
- */
- if (start_addr != TASK_UNMAPPED_BASE) {
- start_addr = addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- goto full_search;
- }
- return -ENOMEM;
- }
- if (!vma || addr + len <= vma->vm_start) {
- /*
- * Remember the place where we stopped the search:
- */
- mm->free_area_cache = addr + len;
- return addr;
- }
- if (addr + mm->cached_hole_size < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
- addr = vma->vm_end;
- if (do_align)
- addr = COLOUR_ALIGN(addr, pgoff);
- }
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = mm->mmap_base;
+ info.high_limit = TASK_SIZE;
+ info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+ info.align_offset = pgoff << PAGE_SHIFT;
+ return vm_unmapped_area(&info);
}
unsigned long
@@ -156,6 +112,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
unsigned long addr = addr0;
int do_align = 0;
int aliasing = cache_is_vipt_aliasing();
+ struct vm_unmapped_area_info info;
/*
* We only need to do colour alignment if either the I or D
@@ -187,70 +144,27 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
return addr;
}
- /* check if free_area_cache is useful for us */
- if (len <= mm->cached_hole_size) {
- mm->cached_hole_size = 0;
- mm->free_area_cache = mm->mmap_base;
- }
-
- /* either no address requested or can't fit in requested address hole */
- addr = mm->free_area_cache;
- if (do_align) {
- unsigned long base = COLOUR_ALIGN_DOWN(addr - len, pgoff);
- addr = base + len;
- }
-
- /* make sure it can fit in the remaining address space */
- if (addr > len) {
- vma = find_vma(mm, addr-len);
- if (!vma || addr <= vma->vm_start)
- /* remember the address as a hint for next time */
- return (mm->free_area_cache = addr-len);
- }
-
- if (mm->mmap_base < len)
- goto bottomup;
-
- addr = mm->mmap_base - len;
- if (do_align)
- addr = COLOUR_ALIGN_DOWN(addr, pgoff);
-
- do {
- /*
- * Lookup failure means no vma is above this address,
- * else if new region fits below vma->vm_start,
- * return with success:
- */
- vma = find_vma(mm, addr);
- if (!vma || addr+len <= vma->vm_start)
- /* remember the address as a hint for next time */
- return (mm->free_area_cache = addr);
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = PAGE_SIZE;
+ info.high_limit = mm->mmap_base;
+ info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+ info.align_offset = pgoff << PAGE_SHIFT;
+ addr = vm_unmapped_area(&info);
- /* remember the largest hole we saw so far */
- if (addr + mm->cached_hole_size < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
-
- /* try just below the current vma->vm_start */
- addr = vma->vm_start - len;
- if (do_align)
- addr = COLOUR_ALIGN_DOWN(addr, pgoff);
- } while (len < vma->vm_start);
-
-bottomup:
/*
* A failed mmap() very likely causes application failure,
* so fall back to the bottom-up function here. This scenario
* can happen with large stack limits and large mmap()
* allocations.
*/
- mm->cached_hole_size = ~0UL;
- mm->free_area_cache = TASK_UNMAPPED_BASE;
- addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
- /*
- * Restore the topdown base:
- */
- mm->free_area_cache = mm->mmap_base;
- mm->cached_hole_size = ~0UL;
+ if (addr & ~PAGE_MASK) {
+ VM_BUG_ON(addr != -ENOMEM);
+ info.flags = 0;
+ info.low_limit = mm->mmap_base;
+ info.high_limit = TASK_SIZE;
+ addr = vm_unmapped_area(&info);
+ }
return addr;
}
@@ -279,7 +193,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* You really shouldn't be using read() or write() on /dev/mem. This
* might go away in the future.
*/
-int valid_phys_addr_range(unsigned long addr, size_t size)
+int valid_phys_addr_range(phys_addr_t addr, size_t size)
{
if (addr < PHYS_OFFSET)
return 0;
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 941dfb9e9a7..9f0610243bd 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -488,7 +488,7 @@ static void __init build_mem_type_table(void)
#endif
for (i = 0; i < 16; i++) {
- unsigned long v = pgprot_val(protection_map[i]);
+ pteval_t v = pgprot_val(protection_map[i]);
protection_map[i] = __pgprot(v | user_pgprot);
}
@@ -876,6 +876,22 @@ static void __init pci_reserve_io(void)
#define pci_reserve_io() do { } while (0)
#endif
+#ifdef CONFIG_DEBUG_LL
+void __init debug_ll_io_init(void)
+{
+ struct map_desc map;
+
+ debug_ll_addr(&map.pfn, &map.virtual);
+ if (!map.pfn || !map.virtual)
+ return;
+ map.pfn = __phys_to_pfn(map.pfn);
+ map.virtual &= PAGE_MASK;
+ map.length = PAGE_SIZE;
+ map.type = MT_DEVICE;
+ create_mapping(&map);
+}
+#endif
+
static void * __initdata vmalloc_min =
(void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET);
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index b29a2265af0..eb6aa73bc8b 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -167,6 +167,10 @@
tst r1, #L_PTE_YOUNG
tstne r1, #L_PTE_PRESENT
moveq r3, #0
+#ifndef CONFIG_CPU_USE_DOMAINS
+ tstne r1, #L_PTE_NONE
+ movne r3, #0
+#endif
str r3, [r0]
mcr p15, 0, r0, c7, c10, 1 @ flush_pte
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 86b8b480634..09c5233f4df 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -89,7 +89,7 @@ ENTRY(cpu_v6_dcache_clean_area)
mov pc, lr
/*
- * cpu_arm926_switch_mm(pgd_phys, tsk)
+ * cpu_v6_switch_mm(pgd_phys, tsk)
*
* Set the translation table base pointer to be pgd_phys
*
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index fd045e70639..6d98c13ab82 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -100,7 +100,11 @@ ENTRY(cpu_v7_set_pte_ext)
orrne r3, r3, #PTE_EXT_XN
tst r1, #L_PTE_YOUNG
- tstne r1, #L_PTE_PRESENT
+ tstne r1, #L_PTE_VALID
+#ifndef CONFIG_CPU_USE_DOMAINS
+ eorne r1, r1, #L_PTE_NONE
+ tstne r1, #L_PTE_NONE
+#endif
moveq r3, #0
ARM( str r3, [r0, #2048]! )
@@ -161,11 +165,11 @@ ENDPROC(cpu_v7_set_pte_ext)
* TFR EV X F I D LR S
* .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM
* rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced
- * 1 0 110 0011 1100 .111 1101 < we want
+ * 01 0 110 0011 1100 .111 1101 < we want
*/
.align 2
.type v7_crval, #object
v7_crval:
- crval clear=0x0120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c
+ crval clear=0x2120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c
.previous
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 8de0f1dd154..7b56386f949 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -65,8 +65,11 @@ ENDPROC(cpu_v7_switch_mm)
*/
ENTRY(cpu_v7_set_pte_ext)
#ifdef CONFIG_MMU
- tst r2, #L_PTE_PRESENT
+ tst r2, #L_PTE_VALID
beq 1f
+ tst r3, #1 << (57 - 32) @ L_PTE_NONE
+ bicne r2, #L_PTE_VALID
+ bne 1f
tst r3, #1 << (55 - 32) @ L_PTE_DIRTY
orreq r2, #L_PTE_RDONLY
1: strd r2, r3, [r0]
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 846d279f317..350f6a74992 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -57,7 +57,7 @@ ENTRY(cpu_v7_reset)
THUMB( bic r1, r1, #1 << 30 ) @ SCTLR.TE (Thumb exceptions)
mcr p15, 0, r1, c1, c0, 0 @ disable MMU
isb
- mov pc, r0
+ bx r0
ENDPROC(cpu_v7_reset)
.popsection
@@ -169,6 +169,63 @@ __v7_ca15mp_setup:
orreq r0, r0, r10 @ Enable CPU-specific SMP bits
mcreq p15, 0, r0, c1, c0, 1
#endif
+
+__v7_pj4b_setup:
+#ifdef CONFIG_CPU_PJ4B
+
+/* Auxiliary Debug Modes Control 1 Register */
+#define PJ4B_STATIC_BP (1 << 2) /* Enable Static BP */
+#define PJ4B_INTER_PARITY (1 << 8) /* Disable Internal Parity Handling */
+#define PJ4B_BCK_OFF_STREX (1 << 5) /* Enable the back off of STREX instr */
+#define PJ4B_CLEAN_LINE (1 << 16) /* Disable data transfer for clean line */
+
+/* Auxiliary Debug Modes Control 2 Register */
+#define PJ4B_FAST_LDR (1 << 23) /* Disable fast LDR */
+#define PJ4B_SNOOP_DATA (1 << 25) /* Do not interleave write and snoop data */
+#define PJ4B_CWF (1 << 27) /* Disable Critical Word First feature */
+#define PJ4B_OUTSDNG_NC (1 << 29) /* Disable outstanding non cacheable rqst */
+#define PJ4B_L1_REP_RR (1 << 30) /* L1 replacement - Strict round robin */
+#define PJ4B_AUX_DBG_CTRL2 (PJ4B_SNOOP_DATA | PJ4B_CWF |\
+ PJ4B_OUTSDNG_NC | PJ4B_L1_REP_RR)
+
+/* Auxiliary Functional Modes Control Register 0 */
+#define PJ4B_SMP_CFB (1 << 1) /* Set SMP mode. Join the coherency fabric */
+#define PJ4B_L1_PAR_CHK (1 << 2) /* Support L1 parity checking */
+#define PJ4B_BROADCAST_CACHE (1 << 8) /* Broadcast Cache and TLB maintenance */
+
+/* Auxiliary Debug Modes Control 0 Register */
+#define PJ4B_WFI_WFE (1 << 22) /* WFI/WFE - serve the DVM and back to idle */
+
+ /* Auxiliary Debug Modes Control 1 Register */
+ mrc p15, 1, r0, c15, c1, 1
+ orr r0, r0, #PJ4B_CLEAN_LINE
+ orr r0, r0, #PJ4B_BCK_OFF_STREX
+ orr r0, r0, #PJ4B_INTER_PARITY
+ bic r0, r0, #PJ4B_STATIC_BP
+ mcr p15, 1, r0, c15, c1, 1
+
+ /* Auxiliary Debug Modes Control 2 Register */
+ mrc p15, 1, r0, c15, c1, 2
+ bic r0, r0, #PJ4B_FAST_LDR
+ orr r0, r0, #PJ4B_AUX_DBG_CTRL2
+ mcr p15, 1, r0, c15, c1, 2
+
+ /* Auxiliary Functional Modes Control Register 0 */
+ mrc p15, 1, r0, c15, c2, 0
+#ifdef CONFIG_SMP
+ orr r0, r0, #PJ4B_SMP_CFB
+#endif
+ orr r0, r0, #PJ4B_L1_PAR_CHK
+ orr r0, r0, #PJ4B_BROADCAST_CACHE
+ mcr p15, 1, r0, c15, c2, 0
+
+ /* Auxiliary Debug Modes Control 0 Register */
+ mrc p15, 1, r0, c15, c1, 0
+ orr r0, r0, #PJ4B_WFI_WFE
+ mcr p15, 1, r0, c15, c1, 0
+
+#endif /* CONFIG_CPU_PJ4B */
+
__v7_setup:
adr r12, __v7_setup_stack @ the local stack
stmia r12, {r0-r5, r7, r9, r11, lr}
@@ -342,6 +399,16 @@ __v7_ca9mp_proc_info:
.long 0xff0ffff0
__v7_proc __v7_ca9mp_setup
.size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info
+
+ /*
+ * Marvell PJ4B processor.
+ */
+ .type __v7_pj4b_proc_info, #object
+__v7_pj4b_proc_info:
+ .long 0x562f5840
+ .long 0xfffffff0
+ __v7_proc __v7_pj4b_setup
+ .size __v7_pj4b_proc_info, . - __v7_pj4b_proc_info
#endif /* CONFIG_ARM_LPAE */
/*
diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h
index bf312c354a2..0f5a5f2a2c7 100644
--- a/arch/arm/mm/vmregion.h
+++ b/arch/arm/mm/vmregion.h
@@ -17,7 +17,6 @@ struct arm_vmregion {
struct list_head vm_list;
unsigned long vm_start;
unsigned long vm_end;
- void *priv;
int vm_active;
const void *caller;
};