aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2015-01-18 20:50:33 +0000
committerMark Brown <broonie@kernel.org>2015-01-18 20:50:33 +0000
commit99687559e46be79e7b0b8f57570928e341b24a6d (patch)
tree81dfd6f7a0930769a1c801edc370da8ed5d48021
parent041be7a254157109dec33e38ff9fe5c6fea60299 (diff)
parent2eb736d6b425cb932b038fd555243b9b0e59c036 (diff)
Merge branch 'linux-linaro-lsk' into linux-linaro-lsk-rt
-rw-r--r--arch/arm64/Kconfig2
-rw-r--r--arch/arm64/include/asm/cpu_ops.h5
-rw-r--r--arch/arm64/include/asm/cpuidle.h13
-rw-r--r--arch/arm64/include/asm/dma-mapping.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h2
-rw-r--r--arch/arm64/include/asm/processor.h4
-rw-r--r--arch/arm64/include/asm/psci.h6
-rw-r--r--arch/arm64/include/asm/stackprotector.h38
-rw-r--r--arch/arm64/include/asm/string.h15
-rw-r--r--arch/arm64/kernel/Makefile1
-rw-r--r--arch/arm64/kernel/arm64ksyms.c5
-rw-r--r--arch/arm64/kernel/cpuidle.c31
-rw-r--r--arch/arm64/kernel/head.S2
-rw-r--r--arch/arm64/kernel/process.c6
-rw-r--r--arch/arm64/kernel/psci.c296
-rw-r--r--arch/arm64/kernel/ptrace.c28
-rw-r--r--arch/arm64/kernel/smp.c22
-rw-r--r--arch/arm64/kernel/time.c1
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S2
-rw-r--r--arch/arm64/lib/Makefile1
-rw-r--r--arch/arm64/lib/memcmp.S258
-rw-r--r--arch/arm64/lib/memcpy.S192
-rw-r--r--arch/arm64/lib/memmove.S190
-rw-r--r--arch/arm64/lib/memset.S207
-rw-r--r--arch/arm64/lib/strcmp.S234
-rw-r--r--arch/arm64/lib/strlen.S126
-rw-r--r--arch/arm64/lib/strncmp.S310
-rw-r--r--arch/arm64/lib/strnlen.S171
-rw-r--r--arch/arm64/mm/cache.S6
-rw-r--r--arch/arm64/mm/hugetlbpage.c4
-rw-r--r--arch/arm64/mm/proc.S2
-rw-r--r--arch/x86/kernel/reboot.c12
-rw-r--r--drivers/cpuidle/Kconfig20
-rw-r--r--drivers/cpuidle/Kconfig.arm649
-rw-r--r--drivers/cpuidle/Makefile2
-rw-r--r--drivers/cpuidle/cpuidle-arm64.c162
-rw-r--r--drivers/cpuidle/dt_idle_states.c213
-rw-r--r--drivers/cpuidle/dt_idle_states.h7
-rw-r--r--drivers/cpuidle/of_idle_states.c274
-rw-r--r--drivers/cpuidle/of_idle_states.h8
-rw-r--r--include/linux/of_device.h15
-rw-r--r--include/linux/reboot.h5
42 files changed, 2312 insertions, 597 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 279594b83781..d64c1a0e9dd0 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1,6 +1,7 @@
config ARM64
def_bool y
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+ select ARCH_HAS_OPP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_HAS_OPP
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
@@ -33,6 +34,7 @@ config ARM64
select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
select HAVE_C_RECORDMCOUNT
+ select HAVE_CC_STACKPROTECTOR
select HAVE_DEBUG_BUGVERBOSE
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_API_DEBUG
diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
index 152413076503..47dfa31ad71a 100644
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -28,6 +28,8 @@ struct device_node;
* enable-method property.
* @cpu_init: Reads any data necessary for a specific enable-method from the
* devicetree, for a given cpu node and proposed logical id.
+ * @cpu_init_idle: Reads any data necessary to initialize CPU idle states from
+ * devicetree, for a given cpu node and proposed logical id.
* @cpu_prepare: Early one-time preparation step for a cpu. If there is a
* mechanism for doing so, tests whether it is possible to boot
* the given CPU.
@@ -39,6 +41,7 @@ struct device_node;
* from the cpu to be killed.
* @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the
* cpu being killed.
+ * @cpu_kill: Ensures a cpu has left the kernel. Called from another cpu.
* @cpu_suspend: Suspends a cpu and saves the required context. May fail owing
* to wrong parameters or error conditions. Called from the
* CPU being suspended. Must be called with IRQs disabled.
@@ -46,12 +49,14 @@ struct device_node;
struct cpu_operations {
const char *name;
int (*cpu_init)(struct device_node *, unsigned int);
+ int (*cpu_init_idle)(struct device_node *, unsigned int);
int (*cpu_prepare)(unsigned int);
int (*cpu_boot)(unsigned int);
void (*cpu_postboot)(void);
#ifdef CONFIG_HOTPLUG_CPU
int (*cpu_disable)(unsigned int cpu);
void (*cpu_die)(unsigned int cpu);
+ int (*cpu_kill)(unsigned int cpu);
#endif
#ifdef CONFIG_ARM64_CPU_SUSPEND
int (*cpu_suspend)(unsigned long);
diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h
new file mode 100644
index 000000000000..b52a9932e2b1
--- /dev/null
+++ b/arch/arm64/include/asm/cpuidle.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_CPUIDLE_H
+#define __ASM_CPUIDLE_H
+
+#ifdef CONFIG_CPU_IDLE
+extern int cpu_init_idle(unsigned int cpu);
+#else
+static inline int cpu_init_idle(unsigned int cpu)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+#endif
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 00a41aab4a37..fa6a0c5a8de3 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -23,8 +23,6 @@
#include <asm-generic/dma-coherent.h>
-#define ARCH_HAS_DMA_GET_REQUIRED_MASK
-
#define DMA_ERROR_CODE (~(dma_addr_t)0)
extern struct dma_map_ops *dma_ops;
extern struct dma_map_ops coherent_swiotlb_dma_ops;
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index fb4b26509276..7099e3b84778 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -257,7 +257,7 @@ static inline pmd_t pte_pmd(pte_t pte)
#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
-#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) &= ~PMD_TYPE_MASK))
+#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK))
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 45b20cd6cbca..349448c0caeb 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -136,8 +136,8 @@ extern struct task_struct *cpu_switch_to(struct task_struct *prev,
#define task_pt_regs(p) \
((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
-#define KSTK_EIP(tsk) task_pt_regs(tsk)->pc
-#define KSTK_ESP(tsk) task_pt_regs(tsk)->sp
+#define KSTK_EIP(tsk) ((unsigned long)task_pt_regs(tsk)->pc)
+#define KSTK_ESP(tsk) ((unsigned long)task_pt_regs(tsk)->sp)
/*
* Prefetching support
diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h
index 9a4b663670ff..e5312ea0ec1a 100644
--- a/arch/arm64/include/asm/psci.h
+++ b/arch/arm64/include/asm/psci.h
@@ -14,10 +14,6 @@
#ifndef __ASM_PSCI_H
#define __ASM_PSCI_H
-struct cpuidle_driver;
-void psci_init(void);
-
-int __init psci_dt_register_idle_states(struct cpuidle_driver *,
- struct device_node *[]);
+int psci_init(void);
#endif /* __ASM_PSCI_H */
diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h
new file mode 100644
index 000000000000..fe5e287dc56b
--- /dev/null
+++ b/arch/arm64/include/asm/stackprotector.h
@@ -0,0 +1,38 @@
+/*
+ * GCC stack protector support.
+ *
+ * Stack protector works by putting predefined pattern at the start of
+ * the stack frame and verifying that it hasn't been overwritten when
+ * returning from the function. The pattern is called stack canary
+ * and gcc expects it to be defined by a global variable called
+ * "__stack_chk_guard" on ARM. This unfortunately means that on SMP
+ * we cannot have a different canary value per task.
+ */
+
+#ifndef __ASM_STACKPROTECTOR_H
+#define __ASM_STACKPROTECTOR_H
+
+#include <linux/random.h>
+#include <linux/version.h>
+
+extern unsigned long __stack_chk_guard;
+
+/*
+ * Initialize the stackprotector canary value.
+ *
+ * NOTE: this must only be called from functions that never return,
+ * and it must always be inlined.
+ */
+static __always_inline void boot_init_stack_canary(void)
+{
+ unsigned long canary;
+
+ /* Try to get a semi random initial value. */
+ get_random_bytes(&canary, sizeof(canary));
+ canary ^= LINUX_VERSION_CODE;
+
+ current->stack_canary = canary;
+ __stack_chk_guard = current->stack_canary;
+}
+
+#endif /* _ASM_STACKPROTECTOR_H */
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index 3ee8b303d9a9..64d2d4884a9d 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -22,6 +22,18 @@ extern char *strrchr(const char *, int c);
#define __HAVE_ARCH_STRCHR
extern char *strchr(const char *, int c);
+#define __HAVE_ARCH_STRCMP
+extern int strcmp(const char *, const char *);
+
+#define __HAVE_ARCH_STRNCMP
+extern int strncmp(const char *, const char *, __kernel_size_t);
+
+#define __HAVE_ARCH_STRLEN
+extern __kernel_size_t strlen(const char *);
+
+#define __HAVE_ARCH_STRNLEN
+extern __kernel_size_t strnlen(const char *, __kernel_size_t);
+
#define __HAVE_ARCH_MEMCPY
extern void *memcpy(void *, const void *, __kernel_size_t);
@@ -34,4 +46,7 @@ extern void *memchr(const void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMSET
extern void *memset(void *, int, __kernel_size_t);
+#define __HAVE_ARCH_MEMCMP
+extern int memcmp(const void *, const void *, size_t);
+
#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index ac389d32ccde..a8ad571c4758 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -30,6 +30,7 @@ arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
arm64-obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o
arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o
arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o
arm64-obj-$(CONFIG_KGDB) += kgdb.o
arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 7f0512feaa13..a85843ddbde8 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -44,10 +44,15 @@ EXPORT_SYMBOL(memstart_addr);
/* string / mem functions */
EXPORT_SYMBOL(strchr);
EXPORT_SYMBOL(strrchr);
+EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strncmp);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(strnlen);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memchr);
+EXPORT_SYMBOL(memcmp);
/* atomic bitops */
EXPORT_SYMBOL(set_bit);
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
new file mode 100644
index 000000000000..19d17f51db37
--- /dev/null
+++ b/arch/arm64/kernel/cpuidle.c
@@ -0,0 +1,31 @@
+/*
+ * ARM64 CPU idle arch support
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include <asm/cpuidle.h>
+#include <asm/cpu_ops.h>
+
+int cpu_init_idle(unsigned int cpu)
+{
+ int ret = -EOPNOTSUPP;
+ struct device_node *cpu_node = of_cpu_device_node_get(cpu);
+
+ if (!cpu_node)
+ return -ENODEV;
+
+ if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_init_idle)
+ ret = cpu_ops[cpu]->cpu_init_idle(cpu_node, cpu);
+
+ of_node_put(cpu_node);
+ return ret;
+}
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index bbc9fe1658fa..50e0cc849b8e 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -349,6 +349,8 @@ ENTRY(set_cpu_boot_mode_flag)
b.ne 1f
add x1, x1, #4
1: str w20, [x1] // This CPU has booted in EL1
+ dmb sy
+ dc ivac, x1 // Invalidate potentially stale cache line
ret
ENDPROC(set_cpu_boot_mode_flag)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 75c6b1e0d606..150134bf8738 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -52,6 +52,12 @@
#include <asm/processor.h>
#include <asm/stacktrace.h>
+#ifdef CONFIG_CC_STACKPROTECTOR
+#include <linux/stackprotector.h>
+unsigned long __stack_chk_guard __read_mostly;
+EXPORT_SYMBOL(__stack_chk_guard);
+#endif
+
static void setup_restart(void)
{
/*
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 0e32ab453e5b..e58a67974f98 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -20,6 +20,10 @@
#include <linux/of.h>
#include <linux/smp.h>
#include <linux/slab.h>
+#include <linux/reboot.h>
+#include <linux/pm.h>
+#include <linux/delay.h>
+#include <uapi/linux/psci.h>
#include <asm/compiler.h>
#include <asm/cpu_ops.h>
@@ -27,6 +31,7 @@
#include <asm/psci.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
+#include <asm/system_misc.h>
#define PSCI_POWER_STATE_TYPE_STANDBY 0
#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1
@@ -43,17 +48,23 @@ struct psci_operations {
int (*cpu_off)(struct psci_power_state state);
int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
int (*migrate)(unsigned long cpuid);
+ int (*affinity_info)(unsigned long target_affinity,
+ unsigned long lowest_affinity_level);
+ int (*migrate_info_type)(void);
};
static struct psci_operations psci_ops;
static int (*invoke_psci_fn)(u64, u64, u64, u64);
+typedef int (*psci_initcall_t)(const struct device_node *);
enum psci_function {
PSCI_FN_CPU_SUSPEND,
PSCI_FN_CPU_ON,
PSCI_FN_CPU_OFF,
PSCI_FN_MIGRATE,
+ PSCI_FN_AFFINITY_INFO,
+ PSCI_FN_MIGRATE_INFO_TYPE,
PSCI_FN_MAX,
};
@@ -61,53 +72,41 @@ static DEFINE_PER_CPU_READ_MOSTLY(struct psci_power_state *, psci_power_state);
static u32 psci_function_id[PSCI_FN_MAX];
-#define PSCI_RET_SUCCESS 0
-#define PSCI_RET_EOPNOTSUPP -1
-#define PSCI_RET_EINVAL -2
-#define PSCI_RET_EPERM -3
-
static int psci_to_linux_errno(int errno)
{
switch (errno) {
case PSCI_RET_SUCCESS:
return 0;
- case PSCI_RET_EOPNOTSUPP:
+ case PSCI_RET_NOT_SUPPORTED:
return -EOPNOTSUPP;
- case PSCI_RET_EINVAL:
+ case PSCI_RET_INVALID_PARAMS:
return -EINVAL;
- case PSCI_RET_EPERM:
+ case PSCI_RET_DENIED:
return -EPERM;
};
return -EINVAL;
}
-#define PSCI_POWER_STATE_ID_MASK 0xffff
-#define PSCI_POWER_STATE_ID_SHIFT 0
-#define PSCI_POWER_STATE_TYPE_MASK 0x1
-#define PSCI_POWER_STATE_TYPE_SHIFT 16
-#define PSCI_POWER_STATE_AFFL_MASK 0x3
-#define PSCI_POWER_STATE_AFFL_SHIFT 24
-
static u32 psci_power_state_pack(struct psci_power_state state)
{
- return ((state.id & PSCI_POWER_STATE_ID_MASK)
- << PSCI_POWER_STATE_ID_SHIFT) |
- ((state.type & PSCI_POWER_STATE_TYPE_MASK)
- << PSCI_POWER_STATE_TYPE_SHIFT) |
- ((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK)
- << PSCI_POWER_STATE_AFFL_SHIFT);
+ return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT)
+ & PSCI_0_2_POWER_STATE_ID_MASK) |
+ ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
+ & PSCI_0_2_POWER_STATE_TYPE_MASK) |
+ ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
+ & PSCI_0_2_POWER_STATE_AFFL_MASK);
}
static void psci_power_state_unpack(u32 power_state,
struct psci_power_state *state)
{
- state->id = (power_state >> PSCI_POWER_STATE_ID_SHIFT)
- & PSCI_POWER_STATE_ID_MASK;
- state->type = (power_state >> PSCI_POWER_STATE_TYPE_SHIFT)
- & PSCI_POWER_STATE_TYPE_MASK;
- state->affinity_level = (power_state >> PSCI_POWER_STATE_AFFL_SHIFT)
- & PSCI_POWER_STATE_AFFL_MASK;
+ state->id = (power_state >> PSCI_0_2_POWER_STATE_ID_SHIFT)
+ & PSCI_0_2_POWER_STATE_ID_MASK;
+ state->type = (power_state >> PSCI_0_2_POWER_STATE_TYPE_SHIFT)
+ & PSCI_0_2_POWER_STATE_TYPE_MASK;
+ state->affinity_level = (power_state >> PSCI_0_2_POWER_STATE_AFFL_SHIFT)
+ & PSCI_0_2_POWER_STATE_AFFL_MASK;
}
/*
@@ -144,6 +143,14 @@ static noinline int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1,
return function_id;
}
+static int psci_get_version(void)
+{
+ int err;
+
+ err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
+ return err;
+}
+
static int psci_cpu_suspend(struct psci_power_state state,
unsigned long entry_point)
{
@@ -187,107 +194,135 @@ static int psci_migrate(unsigned long cpuid)
return psci_to_linux_errno(err);
}
-static const struct of_device_id psci_of_match[] __initconst = {
- { .compatible = "arm,psci", },
- {},
-};
+static int psci_affinity_info(unsigned long target_affinity,
+ unsigned long lowest_affinity_level)
+{
+ int err;
+ u32 fn;
-int __init psci_dt_register_idle_states(struct cpuidle_driver *drv,
- struct device_node *state_nodes[])
+ fn = psci_function_id[PSCI_FN_AFFINITY_INFO];
+ err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0);
+ return err;
+}
+
+static int psci_migrate_info_type(void)
{
- int cpu, i;
- struct psci_power_state *psci_states;
- const struct cpu_operations *cpu_ops_ptr;
+ int err;
+ u32 fn;
+
+ fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE];
+ err = invoke_psci_fn(fn, 0, 0, 0);
+ return err;
+}
+
+static int get_set_conduit_method(struct device_node *np)
+{
+ const char *method;
+
+ pr_info("probing for conduit method from DT.\n");
+
+ if (of_property_read_string(np, "method", &method)) {
+ pr_warn("missing \"method\" property\n");
+ return -ENXIO;
+ }
- if (!state_nodes)
+ if (!strcmp("hvc", method)) {
+ invoke_psci_fn = __invoke_psci_fn_hvc;
+ } else if (!strcmp("smc", method)) {
+ invoke_psci_fn = __invoke_psci_fn_smc;
+ } else {
+ pr_warn("invalid \"method\" property: %s\n", method);
return -EINVAL;
- /*
- * This is belt-and-braces: make sure that if the idle
- * specified protocol is psci, the cpu_ops have been
- * initialized to psci operations. Anything else is
- * a recipe for mayhem.
- */
- for_each_cpu(cpu, drv->cpumask) {
- cpu_ops_ptr = cpu_ops[cpu];
- if (WARN_ON(!cpu_ops_ptr || strcmp(cpu_ops_ptr->name, "psci")))
- return -EOPNOTSUPP;
}
+ return 0;
+}
- psci_states = kcalloc(drv->state_count, sizeof(*psci_states),
- GFP_KERNEL);
+static void psci_sys_reset(char str, const char *cmd)
+{
+ invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
+}
- if (!psci_states) {
- pr_warn("psci idle state allocation failed\n");
- return -ENOMEM;
- }
+static void psci_sys_poweroff(void)
+{
+ invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0);
+}
+
+/*
+ * PSCI Function IDs for v0.2+ are well defined so use
+ * standard values.
+ */
+static int psci_0_2_init(struct device_node *np)
+{
+ int err, ver;
+
+ err = get_set_conduit_method(np);
- for_each_cpu(cpu, drv->cpumask) {
- if (per_cpu(psci_power_state, cpu)) {
- pr_warn("idle states already initialized on cpu %u\n",
- cpu);
- continue;
+ if (err)
+ goto out_put_node;
+
+ ver = psci_get_version();
+
+ if (ver == PSCI_RET_NOT_SUPPORTED) {
+ /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */
+ pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
+ err = -EOPNOTSUPP;
+ goto out_put_node;
+ } else {
+ pr_info("PSCIv%d.%d detected in firmware.\n",
+ PSCI_VERSION_MAJOR(ver),
+ PSCI_VERSION_MINOR(ver));
+
+ if (PSCI_VERSION_MAJOR(ver) == 0 &&
+ PSCI_VERSION_MINOR(ver) < 2) {
+ err = -EINVAL;
+ pr_err("Conflicting PSCI version detected.\n");
+ goto out_put_node;
}
- per_cpu(psci_power_state, cpu) = psci_states;
}
+ pr_info("Using standard PSCI v0.2 function IDs\n");
+ psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND;
+ psci_ops.cpu_suspend = psci_cpu_suspend;
- for (i = 0; i < drv->state_count; i++) {
- u32 psci_power_state;
+ psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;
+ psci_ops.cpu_off = psci_cpu_off;
- if (!state_nodes[i]) {
- /*
- * An index with a missing node pointer falls back to
- * simple STANDBYWFI
- */
- psci_states[i].type = PSCI_POWER_STATE_TYPE_STANDBY;
- continue;
- }
+ psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON;
+ psci_ops.cpu_on = psci_cpu_on;
- if (of_property_read_u32(state_nodes[i], "entry-method-param",
- &psci_power_state)) {
- pr_warn(" * %s missing entry-method-param property\n",
- state_nodes[i]->full_name);
- /*
- * If entry-method-param property is missing, fall
- * back to STANDBYWFI state
- */
- psci_states[i].type = PSCI_POWER_STATE_TYPE_STANDBY;
- continue;
- }
+ psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE;
+ psci_ops.migrate = psci_migrate;
- pr_debug("psci-power-state %#x index %u\n",
- psci_power_state, i);
- psci_power_state_unpack(psci_power_state, &psci_states[i]);
- }
+ psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN64_AFFINITY_INFO;
+ psci_ops.affinity_info = psci_affinity_info;
- return 0;
+ psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] =
+ PSCI_0_2_FN_MIGRATE_INFO_TYPE;
+ psci_ops.migrate_info_type = psci_migrate_info_type;
+
+ arm_pm_restart = psci_sys_reset;
+
+ pm_power_off = psci_sys_poweroff;
+
+out_put_node:
+ of_node_put(np);
+ return err;
}
-void __init psci_init(void)
+/*
+ * PSCI < v0.2 get PSCI Function IDs via DT.
+ */
+static int psci_0_1_init(struct device_node *np)
{
- struct device_node *np;
- const char *method;
u32 id;
+ int err;
- np = of_find_matching_node(NULL, psci_of_match);
- if (!np)
- return;
-
- pr_info("probing function IDs from device-tree\n");
+ err = get_set_conduit_method(np);
- if (of_property_read_string(np, "method", &method)) {
- pr_warning("missing \"method\" property\n");
+ if (err)
goto out_put_node;
- }
- if (!strcmp("hvc", method)) {
- invoke_psci_fn = __invoke_psci_fn_hvc;
- } else if (!strcmp("smc", method)) {
- invoke_psci_fn = __invoke_psci_fn_smc;
- } else {
- pr_warning("invalid \"method\" property: %s\n", method);
- goto out_put_node;
- }
+ pr_info("Using PSCI v0.1 Function IDs from DT\n");
if (!of_property_read_u32(np, "cpu_suspend", &id)) {
psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
@@ -311,7 +346,28 @@ void __init psci_init(void)
out_put_node:
of_node_put(np);
- return;
+ return err;
+}
+
+static const struct of_device_id psci_of_match[] __initconst = {
+ { .compatible = "arm,psci", .data = psci_0_1_init},
+ { .compatible = "arm,psci-0.2", .data = psci_0_2_init},
+ {},
+};
+
+int __init psci_init(void)
+{
+ struct device_node *np;
+ const struct of_device_id *matched_np;
+ psci_initcall_t init_fn;
+
+ np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);
+
+ if (!np)
+ return -ENODEV;
+
+ init_fn = (psci_initcall_t)matched_np->data;
+ return init_fn(np);
}
#ifdef CONFIG_SMP
@@ -364,6 +420,35 @@ static void cpu_psci_cpu_die(unsigned int cpu)
pr_crit("unable to power off CPU%u (%d)\n", cpu, ret);
}
+
+static int cpu_psci_cpu_kill(unsigned int cpu)
+{
+ int err, i;
+
+ if (!psci_ops.affinity_info)
+ return 1;
+ /*
+ * cpu_kill could race with cpu_die and we can
+ * potentially end up declaring this cpu undead
+ * while it is dying. So, try again a few times.
+ */
+
+ for (i = 0; i < 10; i++) {
+ err = psci_ops.affinity_info(cpu_logical_map(cpu), 0);
+ if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) {
+ pr_info("CPU%d killed.\n", cpu);
+ return 1;
+ }
+
+ msleep(10);
+ pr_info("Retrying again to check for CPU kill\n");
+ }
+
+ pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n",
+ cpu, err);
+ /* Make op_cpu_kill() fail. */
+ return 0;
+}
#endif
#ifdef CONFIG_ARM64_CPU_SUSPEND
@@ -386,6 +471,7 @@ const struct cpu_operations cpu_psci_ops = {
#ifdef CONFIG_HOTPLUG_CPU
.cpu_disable = cpu_psci_cpu_disable,
.cpu_die = cpu_psci_cpu_die,
+ .cpu_kill = cpu_psci_cpu_kill,
#endif
#ifdef CONFIG_ARM64_CPU_SUSPEND
.cpu_suspend = cpu_psci_cpu_suspend,
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index bc09a147454f..8ba6b0fa1753 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -655,11 +655,16 @@ static int compat_gpr_get(struct task_struct *target,
reg = task_pt_regs(target)->regs[idx];
}
- ret = copy_to_user(ubuf, &reg, sizeof(reg));
- if (ret)
- break;
-
- ubuf += sizeof(reg);
+ if (kbuf) {
+ memcpy(kbuf, &reg, sizeof(reg));
+ kbuf += sizeof(reg);
+ } else {
+ ret = copy_to_user(ubuf, &reg, sizeof(reg));
+ if (ret)
+ break;
+
+ ubuf += sizeof(reg);
+ }
}
return ret;
@@ -689,11 +694,16 @@ static int compat_gpr_set(struct task_struct *target,
unsigned int idx = start + i;
compat_ulong_t reg;
- ret = copy_from_user(&reg, ubuf, sizeof(reg));
- if (ret)
- return ret;
+ if (kbuf) {
+ memcpy(&reg, kbuf, sizeof(reg));
+ kbuf += sizeof(reg);
+ } else {
+ ret = copy_from_user(&reg, ubuf, sizeof(reg));
+ if (ret)
+ return ret;
- ubuf += sizeof(reg);
+ ubuf += sizeof(reg);
+ }
switch (idx) {
case 15:
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 7c868a2ac38b..0ac31a581f02 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -231,6 +231,19 @@ int __cpu_disable(void)
return 0;
}
+static int op_cpu_kill(unsigned int cpu)
+{
+ /*
+ * If we have no means of synchronising with the dying CPU, then assume
+ * that it is really dead. We can only wait for an arbitrary length of
+ * time and hope that it's dead, so let's skip the wait and just hope.
+ */
+ if (!cpu_ops[cpu]->cpu_kill)
+ return 1;
+
+ return cpu_ops[cpu]->cpu_kill(cpu);
+}
+
static DECLARE_COMPLETION(cpu_died);
/*
@@ -244,6 +257,15 @@ void __cpu_die(unsigned int cpu)
return;
}
pr_notice("CPU%u: shutdown\n", cpu);
+
+ /*
+ * Now that the dying CPU is beyond the point of no return w.r.t.
+ * in-kernel synchronisation, try to get the firwmare to help us to
+ * verify that it has really left the kernel before we consider
+ * clobbering anything it might still be using.
+ */
+ if (!op_cpu_kill(cpu))
+ pr_warn("CPU%d may not have shut down cleanly\n", cpu);
}
/*
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 03dc3718eb13..e1c7a540361b 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -18,6 +18,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/clockchips.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/interrupt.h>
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 55d0e035205f..de93a4bbdb7c 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -13,7 +13,7 @@
#define ARM_EXIT_DISCARD(x) x
OUTPUT_ARCH(aarch64)
-ENTRY(stext)
+ENTRY(_text)
jiffies = jiffies_64;
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 328ce1a99daa..d98d3e39879e 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -1,4 +1,5 @@
lib-y := bitops.o clear_user.o delay.o copy_from_user.o \
copy_to_user.o copy_in_user.o copy_page.o \
clear_page.o memchr.o memcpy.o memmove.o memset.o \
+ memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
strchr.o strrchr.o
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
new file mode 100644
index 000000000000..6ea0776ba6de
--- /dev/null
+++ b/arch/arm64/lib/memcmp.S
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+* compare memory areas(when two memory areas' offset are different,
+* alignment handled by the hardware)
+*
+* Parameters:
+* x0 - const memory area 1 pointer
+* x1 - const memory area 2 pointer
+* x2 - the maximal compare byte length
+* Returns:
+* x0 - a compare result, maybe less than, equal to, or greater than ZERO
+*/
+
+/* Parameters and result. */
+src1 .req x0
+src2 .req x1
+limit .req x2
+result .req x0
+
+/* Internal variables. */
+data1 .req x3
+data1w .req w3
+data2 .req x4
+data2w .req w4
+has_nul .req x5
+diff .req x6
+endloop .req x7
+tmp1 .req x8
+tmp2 .req x9
+tmp3 .req x10
+pos .req x11
+limit_wd .req x12
+mask .req x13
+
+ENTRY(memcmp)
+ cbz limit, .Lret0
+ eor tmp1, src1, src2
+ tst tmp1, #7
+ b.ne .Lmisaligned8
+ ands tmp1, src1, #7
+ b.ne .Lmutual_align
+ sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
+ lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
+ /*
+ * The input source addresses are at alignment boundary.
+ * Directly compare eight bytes each time.
+ */
+.Lloop_aligned:
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+.Lstart_realigned:
+ subs limit_wd, limit_wd, #1
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ csinv endloop, diff, xzr, cs /* Last Dword or differences. */
+ cbz endloop, .Lloop_aligned
+
+ /* Not reached the limit, must have found a diff. */
+ tbz limit_wd, #63, .Lnot_limit
+
+ /* Limit % 8 == 0 => the diff is in the last 8 bytes. */
+ ands limit, limit, #7
+ b.eq .Lnot_limit
+ /*
+ * The remained bytes less than 8. It is needed to extract valid data
+ * from last eight bytes of the intended memory range.
+ */
+ lsl limit, limit, #3 /* bytes-> bits. */
+ mov mask, #~0
+CPU_BE( lsr mask, mask, limit )
+CPU_LE( lsl mask, mask, limit )
+ bic data1, data1, mask
+ bic data2, data2, mask
+
+ orr diff, diff, mask
+ b .Lnot_limit
+
+.Lmutual_align:
+ /*
+ * Sources are mutually aligned, but are not currently at an
+ * alignment boundary. Round down the addresses and then mask off
+ * the bytes that precede the start point.
+ */
+ bic src1, src1, #7
+ bic src2, src2, #7
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+ /*
+ * We can not add limit with alignment offset(tmp1) here. Since the
+ * addition probably make the limit overflown.
+ */
+ sub limit_wd, limit, #1/*limit != 0, so no underflow.*/
+ and tmp3, limit_wd, #7
+ lsr limit_wd, limit_wd, #3
+ add tmp3, tmp3, tmp1
+ add limit_wd, limit_wd, tmp3, lsr #3
+ add limit, limit, tmp1/* Adjust the limit for the extra. */
+
+ lsl tmp1, tmp1, #3/* Bytes beyond alignment -> bits.*/
+ neg tmp1, tmp1/* Bits to alignment -64. */
+ mov tmp2, #~0
+ /*mask off the non-intended bytes before the start address.*/
+CPU_BE( lsl tmp2, tmp2, tmp1 )/*Big-endian.Early bytes are at MSB*/
+ /* Little-endian. Early bytes are at LSB. */
+CPU_LE( lsr tmp2, tmp2, tmp1 )
+
+ orr data1, data1, tmp2
+ orr data2, data2, tmp2
+ b .Lstart_realigned
+
+ /*src1 and src2 have different alignment offset.*/
+.Lmisaligned8:
+ cmp limit, #8
+ b.lo .Ltiny8proc /*limit < 8: compare byte by byte*/
+
+ and tmp1, src1, #7
+ neg tmp1, tmp1
+ add tmp1, tmp1, #8/*valid length in the first 8 bytes of src1*/
+ and tmp2, src2, #7
+ neg tmp2, tmp2
+ add tmp2, tmp2, #8/*valid length in the first 8 bytes of src2*/
+ subs tmp3, tmp1, tmp2
+ csel pos, tmp1, tmp2, hi /*Choose the maximum.*/
+
+ sub limit, limit, pos
+ /*compare the proceeding bytes in the first 8 byte segment.*/
+.Ltinycmp:
+ ldrb data1w, [src1], #1
+ ldrb data2w, [src2], #1
+ subs pos, pos, #1
+ ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */
+ b.eq .Ltinycmp
+ cbnz pos, 1f /*diff occurred before the last byte.*/
+ cmp data1w, data2w
+ b.eq .Lstart_align
+1:
+ sub result, data1, data2
+ ret
+
+.Lstart_align:
+ lsr limit_wd, limit, #3
+ cbz limit_wd, .Lremain8
+
+ ands xzr, src1, #7
+ b.eq .Lrecal_offset
+ /*process more leading bytes to make src1 aligned...*/
+ add src1, src1, tmp3 /*backwards src1 to alignment boundary*/
+ add src2, src2, tmp3
+ sub limit, limit, tmp3
+ lsr limit_wd, limit, #3
+ cbz limit_wd, .Lremain8
+ /*load 8 bytes from aligned SRC1..*/
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+
+ subs limit_wd, limit_wd, #1
+ eor diff, data1, data2 /*Non-zero if differences found.*/
+ csinv endloop, diff, xzr, ne
+ cbnz endloop, .Lunequal_proc
+ /*How far is the current SRC2 from the alignment boundary...*/
+ and tmp3, tmp3, #7
+
+.Lrecal_offset:/*src1 is aligned now..*/
+ neg pos, tmp3
+.Lloopcmp_proc:
+ /*
+ * Divide the eight bytes into two parts. First,backwards the src2
+ * to an alignment boundary,load eight bytes and compare from
+ * the SRC2 alignment boundary. If all 8 bytes are equal,then start
+ * the second part's comparison. Otherwise finish the comparison.
+ * This special handle can garantee all the accesses are in the
+ * thread/task space in avoid to overrange access.
+ */
+ ldr data1, [src1,pos]
+ ldr data2, [src2,pos]
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ cbnz diff, .Lnot_limit
+
+ /*The second part process*/
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ subs limit_wd, limit_wd, #1
+ csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
+ cbz endloop, .Lloopcmp_proc
+.Lunequal_proc:
+ cbz diff, .Lremain8
+
+/*There is differnence occured in the latest comparison.*/
+.Lnot_limit:
+/*
+* For little endian,reverse the low significant equal bits into MSB,then
+* following CLZ can find how many equal bits exist.
+*/
+CPU_LE( rev diff, diff )
+CPU_LE( rev data1, data1 )
+CPU_LE( rev data2, data2 )
+
+ /*
+ * The MS-non-zero bit of DIFF marks either the first bit
+ * that is different, or the end of the significant data.
+ * Shifting left now will bring the critical information into the
+ * top bits.
+ */
+ clz pos, diff
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+ /*
+ * We need to zero-extend (char is unsigned) the value and then
+ * perform a signed subtraction.
+ */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ ret
+
+.Lremain8:
+ /* Limit % 8 == 0 =>. all data are equal.*/
+ ands limit, limit, #7
+ b.eq .Lret0
+
+.Ltiny8proc:
+ ldrb data1w, [src1], #1
+ ldrb data2w, [src2], #1
+ subs limit, limit, #1
+
+ ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */
+ b.eq .Ltiny8proc
+ sub result, data1, data2
+ ret
+.Lret0:
+ mov result, #0
+ ret
+ENDPROC(memcmp)
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index 27b5003609b6..8a9a96d3ddae 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -1,5 +1,13 @@
/*
* Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -16,6 +24,7 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
+#include <asm/cache.h>
/*
* Copy a buffer from src to dest (alignment handled by the hardware)
@@ -27,27 +36,166 @@
* Returns:
* x0 - dest
*/
+dstin .req x0
+src .req x1
+count .req x2
+tmp1 .req x3
+tmp1w .req w3
+tmp2 .req x4
+tmp2w .req w4
+tmp3 .req x5
+tmp3w .req w5
+dst .req x6
+
+A_l .req x7
+A_h .req x8
+B_l .req x9
+B_h .req x10
+C_l .req x11
+C_h .req x12
+D_l .req x13
+D_h .req x14
+
ENTRY(memcpy)
- mov x4, x0
- subs x2, x2, #8
- b.mi 2f
-1: ldr x3, [x1], #8
- subs x2, x2, #8
- str x3, [x4], #8
- b.pl 1b
-2: adds x2, x2, #4
- b.mi 3f
- ldr w3, [x1], #4
- sub x2, x2, #4
- str w3, [x4], #4
-3: adds x2, x2, #2
- b.mi 4f
- ldrh w3, [x1], #2
- sub x2, x2, #2
- strh w3, [x4], #2
-4: adds x2, x2, #1
- b.mi 5f
- ldrb w3, [x1]
- strb w3, [x4]
-5: ret
+ mov dst, dstin
+ cmp count, #16
+ /*When memory length is less than 16, the accessed are not aligned.*/
+ b.lo .Ltiny15
+
+ neg tmp2, src
+ ands tmp2, tmp2, #15/* Bytes to reach alignment. */
+ b.eq .LSrcAligned
+ sub count, count, tmp2
+ /*
+ * Copy the leading memory data from src to dst in an increasing
+ * address order.By this way,the risk of overwritting the source
+ * memory data is eliminated when the distance between src and
+ * dst is less than 16. The memory accesses here are alignment.
+ */
+ tbz tmp2, #0, 1f
+ ldrb tmp1w, [src], #1
+ strb tmp1w, [dst], #1
+1:
+ tbz tmp2, #1, 2f
+ ldrh tmp1w, [src], #2
+ strh tmp1w, [dst], #2
+2:
+ tbz tmp2, #2, 3f
+ ldr tmp1w, [src], #4
+ str tmp1w, [dst], #4
+3:
+ tbz tmp2, #3, .LSrcAligned
+ ldr tmp1, [src],#8
+ str tmp1, [dst],#8
+
+.LSrcAligned:
+ cmp count, #64
+ b.ge .Lcpy_over64
+ /*
+ * Deal with small copies quickly by dropping straight into the
+ * exit block.
+ */
+.Ltail63:
+ /*
+ * Copy up to 48 bytes of data. At this point we only need the
+ * bottom 6 bits of count to be accurate.
+ */
+ ands tmp1, count, #0x30
+ b.eq .Ltiny15
+ cmp tmp1w, #0x20
+ b.eq 1f
+ b.lt 2f
+ ldp A_l, A_h, [src], #16
+ stp A_l, A_h, [dst], #16
+1:
+ ldp A_l, A_h, [src], #16
+ stp A_l, A_h, [dst], #16
+2:
+ ldp A_l, A_h, [src], #16
+ stp A_l, A_h, [dst], #16
+.Ltiny15:
+ /*
+ * Prefer to break one ldp/stp into several load/store to access
+ * memory in an increasing address order,rather than to load/store 16
+ * bytes from (src-16) to (dst-16) and to backward the src to aligned
+ * address,which way is used in original cortex memcpy. If keeping
+ * the original memcpy process here, memmove need to satisfy the
+ * precondition that src address is at least 16 bytes bigger than dst
+ * address,otherwise some source data will be overwritten when memove
+ * call memcpy directly. To make memmove simpler and decouple the
+ * memcpy's dependency on memmove, withdrew the original process.
+ */
+ tbz count, #3, 1f
+ ldr tmp1, [src], #8
+ str tmp1, [dst], #8
+1:
+ tbz count, #2, 2f
+ ldr tmp1w, [src], #4
+ str tmp1w, [dst], #4
+2:
+ tbz count, #1, 3f
+ ldrh tmp1w, [src], #2
+ strh tmp1w, [dst], #2
+3:
+ tbz count, #0, .Lexitfunc
+ ldrb tmp1w, [src]
+ strb tmp1w, [dst]
+
+.Lexitfunc:
+ ret
+
+.Lcpy_over64:
+ subs count, count, #128
+ b.ge .Lcpy_body_large
+ /*
+ * Less than 128 bytes to copy, so handle 64 here and then jump
+ * to the tail.
+ */
+ ldp A_l, A_h, [src],#16
+ stp A_l, A_h, [dst],#16
+ ldp B_l, B_h, [src],#16
+ ldp C_l, C_h, [src],#16
+ stp B_l, B_h, [dst],#16
+ stp C_l, C_h, [dst],#16
+ ldp D_l, D_h, [src],#16
+ stp D_l, D_h, [dst],#16
+
+ tst count, #0x3f
+ b.ne .Ltail63
+ ret
+
+ /*
+ * Critical loop. Start at a new cache line boundary. Assuming
+ * 64 bytes per line this ensures the entire loop is in one line.
+ */
+ .p2align L1_CACHE_SHIFT
+.Lcpy_body_large:
+ /* pre-get 64 bytes data. */
+ ldp A_l, A_h, [src],#16
+ ldp B_l, B_h, [src],#16
+ ldp C_l, C_h, [src],#16
+ ldp D_l, D_h, [src],#16
+1:
+ /*
+ * interlace the load of next 64 bytes data block with store of the last
+ * loaded 64 bytes data.
+ */
+ stp A_l, A_h, [dst],#16
+ ldp A_l, A_h, [src],#16
+ stp B_l, B_h, [dst],#16
+ ldp B_l, B_h, [src],#16
+ stp C_l, C_h, [dst],#16
+ ldp C_l, C_h, [src],#16
+ stp D_l, D_h, [dst],#16
+ ldp D_l, D_h, [src],#16
+ subs count, count, #64
+ b.ge 1b
+ stp A_l, A_h, [dst],#16
+ stp B_l, B_h, [dst],#16
+ stp C_l, C_h, [dst],#16
+ stp D_l, D_h, [dst],#16
+
+ tst count, #0x3f
+ b.ne .Ltail63
+ ret
ENDPROC(memcpy)
diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S
index b79fdfa42d39..57b19ea2dad4 100644
--- a/arch/arm64/lib/memmove.S
+++ b/arch/arm64/lib/memmove.S
@@ -1,5 +1,13 @@
/*
* Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -16,6 +24,7 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
+#include <asm/cache.h>
/*
* Move a buffer from src to test (alignment handled by the hardware).
@@ -28,30 +37,161 @@
* Returns:
* x0 - dest
*/
+dstin .req x0
+src .req x1
+count .req x2
+tmp1 .req x3
+tmp1w .req w3
+tmp2 .req x4
+tmp2w .req w4
+tmp3 .req x5
+tmp3w .req w5
+dst .req x6
+
+A_l .req x7
+A_h .req x8
+B_l .req x9
+B_h .req x10
+C_l .req x11
+C_h .req x12
+D_l .req x13
+D_h .req x14
+
ENTRY(memmove)
- cmp x0, x1
- b.ls memcpy
- add x4, x0, x2
- add x1, x1, x2
- subs x2, x2, #8
- b.mi 2f
-1: ldr x3, [x1, #-8]!
- subs x2, x2, #8
- str x3, [x4, #-8]!
- b.pl 1b
-2: adds x2, x2, #4
- b.mi 3f
- ldr w3, [x1, #-4]!
- sub x2, x2, #4
- str w3, [x4, #-4]!
-3: adds x2, x2, #2
- b.mi 4f
- ldrh w3, [x1, #-2]!
- sub x2, x2, #2
- strh w3, [x4, #-2]!
-4: adds x2, x2, #1
- b.mi 5f
- ldrb w3, [x1, #-1]
- strb w3, [x4, #-1]
-5: ret
+ cmp dstin, src
+ b.lo memcpy
+ add tmp1, src, count
+ cmp dstin, tmp1
+ b.hs memcpy /* No overlap. */
+
+ add dst, dstin, count
+ add src, src, count
+ cmp count, #16
+ b.lo .Ltail15 /*probably non-alignment accesses.*/
+
+ ands tmp2, src, #15 /* Bytes to reach alignment. */
+ b.eq .LSrcAligned
+ sub count, count, tmp2
+ /*
+ * process the aligned offset length to make the src aligned firstly.
+ * those extra instructions' cost is acceptable. It also make the
+ * coming accesses are based on aligned address.
+ */
+ tbz tmp2, #0, 1f
+ ldrb tmp1w, [src, #-1]!
+ strb tmp1w, [dst, #-1]!
+1:
+ tbz tmp2, #1, 2f
+ ldrh tmp1w, [src, #-2]!
+ strh tmp1w, [dst, #-2]!
+2:
+ tbz tmp2, #2, 3f
+ ldr tmp1w, [src, #-4]!
+ str tmp1w, [dst, #-4]!
+3:
+ tbz tmp2, #3, .LSrcAligned
+ ldr tmp1, [src, #-8]!
+ str tmp1, [dst, #-8]!
+
+.LSrcAligned:
+ cmp count, #64
+ b.ge .Lcpy_over64
+
+ /*
+ * Deal with small copies quickly by dropping straight into the
+ * exit block.
+ */
+.Ltail63:
+ /*
+ * Copy up to 48 bytes of data. At this point we only need the
+ * bottom 6 bits of count to be accurate.
+ */
+ ands tmp1, count, #0x30
+ b.eq .Ltail15
+ cmp tmp1w, #0x20
+ b.eq 1f
+ b.lt 2f
+ ldp A_l, A_h, [src, #-16]!
+ stp A_l, A_h, [dst, #-16]!
+1:
+ ldp A_l, A_h, [src, #-16]!
+ stp A_l, A_h, [dst, #-16]!
+2:
+ ldp A_l, A_h, [src, #-16]!
+ stp A_l, A_h, [dst, #-16]!
+
+.Ltail15:
+ tbz count, #3, 1f
+ ldr tmp1, [src, #-8]!
+ str tmp1, [dst, #-8]!
+1:
+ tbz count, #2, 2f
+ ldr tmp1w, [src, #-4]!
+ str tmp1w, [dst, #-4]!
+2:
+ tbz count, #1, 3f
+ ldrh tmp1w, [src, #-2]!
+ strh tmp1w, [dst, #-2]!
+3:
+ tbz count, #0, .Lexitfunc
+ ldrb tmp1w, [src, #-1]
+ strb tmp1w, [dst, #-1]
+
+.Lexitfunc:
+ ret
+
+.Lcpy_over64:
+ subs count, count, #128
+ b.ge .Lcpy_body_large
+ /*
+ * Less than 128 bytes to copy, so handle 64 bytes here and then jump
+ * to the tail.
+ */
+ ldp A_l, A_h, [src, #-16]
+ stp A_l, A_h, [dst, #-16]
+ ldp B_l, B_h, [src, #-32]
+ ldp C_l, C_h, [src, #-48]
+ stp B_l, B_h, [dst, #-32]
+ stp C_l, C_h, [dst, #-48]
+ ldp D_l, D_h, [src, #-64]!
+ stp D_l, D_h, [dst, #-64]!
+
+ tst count, #0x3f
+ b.ne .Ltail63
+ ret
+
+ /*
+ * Critical loop. Start at a new cache line boundary. Assuming
+ * 64 bytes per line this ensures the entire loop is in one line.
+ */
+ .p2align L1_CACHE_SHIFT
+.Lcpy_body_large:
+ /* pre-load 64 bytes data. */
+ ldp A_l, A_h, [src, #-16]
+ ldp B_l, B_h, [src, #-32]
+ ldp C_l, C_h, [src, #-48]
+ ldp D_l, D_h, [src, #-64]!
+1:
+ /*
+ * interlace the load of next 64 bytes data block with store of the last
+ * loaded 64 bytes data.
+ */
+ stp A_l, A_h, [dst, #-16]
+ ldp A_l, A_h, [src, #-16]
+ stp B_l, B_h, [dst, #-32]
+ ldp B_l, B_h, [src, #-32]
+ stp C_l, C_h, [dst, #-48]
+ ldp C_l, C_h, [src, #-48]
+ stp D_l, D_h, [dst, #-64]!
+ ldp D_l, D_h, [src, #-64]!
+ subs count, count, #64
+ b.ge 1b
+ stp A_l, A_h, [dst, #-16]
+ stp B_l, B_h, [dst, #-32]
+ stp C_l, C_h, [dst, #-48]
+ stp D_l, D_h, [dst, #-64]!
+
+ tst count, #0x3f
+ b.ne .Ltail63
+ ret
ENDPROC(memmove)
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
index 87e4a68fbbbc..7c72dfd36b63 100644
--- a/arch/arm64/lib/memset.S
+++ b/arch/arm64/lib/memset.S
@@ -1,5 +1,13 @@
/*
* Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -16,6 +24,7 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
+#include <asm/cache.h>
/*
* Fill in the buffer with character c (alignment handled by the hardware)
@@ -27,27 +36,181 @@
* Returns:
* x0 - buf
*/
+
+dstin .req x0
+val .req w1
+count .req x2
+tmp1 .req x3
+tmp1w .req w3
+tmp2 .req x4
+tmp2w .req w4
+zva_len_x .req x5
+zva_len .req w5
+zva_bits_x .req x6
+
+A_l .req x7
+A_lw .req w7
+dst .req x8
+tmp3w .req w9
+tmp3 .req x9
+
ENTRY(memset)
- mov x4, x0
- and w1, w1, #0xff
- orr w1, w1, w1, lsl #8
- orr w1, w1, w1, lsl #16
- orr x1, x1, x1, lsl #32
- subs x2, x2, #8
- b.mi 2f
-1: str x1, [x4], #8
- subs x2, x2, #8
- b.pl 1b
-2: adds x2, x2, #4
- b.mi 3f
- sub x2, x2, #4
- str w1, [x4], #4
-3: adds x2, x2, #2
- b.mi 4f
- sub x2, x2, #2
- strh w1, [x4], #2
-4: adds x2, x2, #1
- b.mi 5f
- strb w1, [x4]
-5: ret
+ mov dst, dstin /* Preserve return value. */
+ and A_lw, val, #255
+ orr A_lw, A_lw, A_lw, lsl #8
+ orr A_lw, A_lw, A_lw, lsl #16
+ orr A_l, A_l, A_l, lsl #32
+
+ cmp count, #15
+ b.hi .Lover16_proc
+ /*All store maybe are non-aligned..*/
+ tbz count, #3, 1f
+ str A_l, [dst], #8
+1:
+ tbz count, #2, 2f
+ str A_lw, [dst], #4
+2:
+ tbz count, #1, 3f
+ strh A_lw, [dst], #2
+3:
+ tbz count, #0, 4f
+ strb A_lw, [dst]
+4:
+ ret
+
+.Lover16_proc:
+ /*Whether the start address is aligned with 16.*/
+ neg tmp2, dst
+ ands tmp2, tmp2, #15
+ b.eq .Laligned
+/*
+* The count is not less than 16, we can use stp to store the start 16 bytes,
+* then adjust the dst aligned with 16.This process will make the current
+* memory address at alignment boundary.
+*/
+ stp A_l, A_l, [dst] /*non-aligned store..*/
+ /*make the dst aligned..*/
+ sub count, count, tmp2
+ add dst, dst, tmp2
+
+.Laligned:
+ cbz A_l, .Lzero_mem
+
+.Ltail_maybe_long:
+ cmp count, #64
+ b.ge .Lnot_short
+.Ltail63:
+ ands tmp1, count, #0x30
+ b.eq 3f
+ cmp tmp1w, #0x20
+ b.eq 1f
+ b.lt 2f
+ stp A_l, A_l, [dst], #16
+1:
+ stp A_l, A_l, [dst], #16
+2:
+ stp A_l, A_l, [dst], #16
+/*
+* The last store length is less than 16,use stp to write last 16 bytes.
+* It will lead some bytes written twice and the access is non-aligned.
+*/
+3:
+ ands count, count, #15
+ cbz count, 4f
+ add dst, dst, count
+ stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */
+4:
+ ret
+
+ /*
+ * Critical loop. Start at a new cache line boundary. Assuming
+ * 64 bytes per line, this ensures the entire loop is in one line.
+ */
+ .p2align L1_CACHE_SHIFT
+.Lnot_short:
+ sub dst, dst, #16/* Pre-bias. */
+ sub count, count, #64
+1:
+ stp A_l, A_l, [dst, #16]
+ stp A_l, A_l, [dst, #32]
+ stp A_l, A_l, [dst, #48]
+ stp A_l, A_l, [dst, #64]!
+ subs count, count, #64
+ b.ge 1b
+ tst count, #0x3f
+ add dst, dst, #16
+ b.ne .Ltail63
+.Lexitfunc:
+ ret
+
+ /*
+ * For zeroing memory, check to see if we can use the ZVA feature to
+ * zero entire 'cache' lines.
+ */
+.Lzero_mem:
+ cmp count, #63
+ b.le .Ltail63
+ /*
+ * For zeroing small amounts of memory, it's not worth setting up
+ * the line-clear code.
+ */
+ cmp count, #128
+ b.lt .Lnot_short /*count is at least 128 bytes*/
+
+ mrs tmp1, dczid_el0
+ tbnz tmp1, #4, .Lnot_short
+ mov tmp3w, #4
+ and zva_len, tmp1w, #15 /* Safety: other bits reserved. */
+ lsl zva_len, tmp3w, zva_len
+
+ ands tmp3w, zva_len, #63
+ /*
+ * ensure the zva_len is not less than 64.
+ * It is not meaningful to use ZVA if the block size is less than 64.
+ */
+ b.ne .Lnot_short
+.Lzero_by_line:
+ /*
+ * Compute how far we need to go to become suitably aligned. We're
+ * already at quad-word alignment.
+ */
+ cmp count, zva_len_x
+ b.lt .Lnot_short /* Not enough to reach alignment. */
+ sub zva_bits_x, zva_len_x, #1
+ neg tmp2, dst
+ ands tmp2, tmp2, zva_bits_x
+ b.eq 2f /* Already aligned. */
+ /* Not aligned, check that there's enough to copy after alignment.*/
+ sub tmp1, count, tmp2
+ /*
+ * grantee the remain length to be ZVA is bigger than 64,
+ * avoid to make the 2f's process over mem range.*/
+ cmp tmp1, #64
+ ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */
+ b.lt .Lnot_short
+ /*
+ * We know that there's at least 64 bytes to zero and that it's safe
+ * to overrun by 64 bytes.
+ */
+ mov count, tmp1
+1:
+ stp A_l, A_l, [dst]
+ stp A_l, A_l, [dst, #16]
+ stp A_l, A_l, [dst, #32]
+ subs tmp2, tmp2, #64
+ stp A_l, A_l, [dst, #48]
+ add dst, dst, #64
+ b.ge 1b
+ /* We've overrun a bit, so adjust dst downwards.*/
+ add dst, dst, tmp2
+2:
+ sub count, count, zva_len_x
+3:
+ dc zva, dst
+ add dst, dst, zva_len_x
+ subs count, count, zva_len_x
+ b.ge 3b
+ ands count, count, zva_bits_x
+ b.ne .Ltail_maybe_long
+ ret
ENDPROC(memset)
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
new file mode 100644
index 000000000000..42f828b06c59
--- /dev/null
+++ b/arch/arm64/lib/strcmp.S
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * compare two strings
+ *
+ * Parameters:
+ * x0 - const string 1 pointer
+ * x1 - const string 2 pointer
+ * Returns:
+ * x0 - an integer less than, equal to, or greater than zero
+ * if s1 is found, respectively, to be less than, to match,
+ * or be greater than s2.
+ */
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+/* Parameters and result. */
+src1 .req x0
+src2 .req x1
+result .req x0
+
+/* Internal variables. */
+data1 .req x2
+data1w .req w2
+data2 .req x3
+data2w .req w3
+has_nul .req x4
+diff .req x5
+syndrome .req x6
+tmp1 .req x7
+tmp2 .req x8
+tmp3 .req x9
+zeroones .req x10
+pos .req x11
+
+ENTRY(strcmp)
+ eor tmp1, src1, src2
+ mov zeroones, #REP8_01
+ tst tmp1, #7
+ b.ne .Lmisaligned8
+ ands tmp1, src1, #7
+ b.ne .Lmutual_align
+
+ /*
+ * NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+ * can be done in parallel across the entire word.
+ */
+.Lloop_aligned:
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+.Lstart_realigned:
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+ orr syndrome, diff, has_nul
+ cbz syndrome, .Lloop_aligned
+ b .Lcal_cmpresult
+
+.Lmutual_align:
+ /*
+ * Sources are mutually aligned, but are not currently at an
+ * alignment boundary. Round down the addresses and then mask off
+ * the bytes that preceed the start point.
+ */
+ bic src1, src1, #7
+ bic src2, src2, #7
+ lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
+ ldr data1, [src1], #8
+ neg tmp1, tmp1 /* Bits to alignment -64. */
+ ldr data2, [src2], #8
+ mov tmp2, #~0
+ /* Big-endian. Early bytes are at MSB. */
+CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
+ /* Little-endian. Early bytes are at LSB. */
+CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
+
+ orr data1, data1, tmp2
+ orr data2, data2, tmp2
+ b .Lstart_realigned
+
+.Lmisaligned8:
+ /*
+ * Get the align offset length to compare per byte first.
+ * After this process, one string's address will be aligned.
+ */
+ and tmp1, src1, #7
+ neg tmp1, tmp1
+ add tmp1, tmp1, #8
+ and tmp2, src2, #7
+ neg tmp2, tmp2
+ add tmp2, tmp2, #8
+ subs tmp3, tmp1, tmp2
+ csel pos, tmp1, tmp2, hi /*Choose the maximum. */
+.Ltinycmp:
+ ldrb data1w, [src1], #1
+ ldrb data2w, [src2], #1
+ subs pos, pos, #1
+ ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
+ ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
+ b.eq .Ltinycmp
+ cbnz pos, 1f /*find the null or unequal...*/
+ cmp data1w, #1
+ ccmp data1w, data2w, #0, cs
+ b.eq .Lstart_align /*the last bytes are equal....*/
+1:
+ sub result, data1, data2
+ ret
+
+.Lstart_align:
+ ands xzr, src1, #7
+ b.eq .Lrecal_offset
+ /*process more leading bytes to make str1 aligned...*/
+ add src1, src1, tmp3
+ add src2, src2, tmp3
+ /*load 8 bytes from aligned str1 and non-aligned str2..*/
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ bic has_nul, tmp1, tmp2
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ orr syndrome, diff, has_nul
+ cbnz syndrome, .Lcal_cmpresult
+ /*How far is the current str2 from the alignment boundary...*/
+ and tmp3, tmp3, #7
+.Lrecal_offset:
+ neg pos, tmp3
+.Lloopcmp_proc:
+ /*
+ * Divide the eight bytes into two parts. First,backwards the src2
+ * to an alignment boundary,load eight bytes from the SRC2 alignment
+ * boundary,then compare with the relative bytes from SRC1.
+ * If all 8 bytes are equal,then start the second part's comparison.
+ * Otherwise finish the comparison.
+ * This special handle can garantee all the accesses are in the
+ * thread/task space in avoid to overrange access.
+ */
+ ldr data1, [src1,pos]
+ ldr data2, [src2,pos]
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ bic has_nul, tmp1, tmp2
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ orr syndrome, diff, has_nul
+ cbnz syndrome, .Lcal_cmpresult
+
+ /*The second part process*/
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ bic has_nul, tmp1, tmp2
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ orr syndrome, diff, has_nul
+ cbz syndrome, .Lloopcmp_proc
+
+.Lcal_cmpresult:
+ /*
+ * reversed the byte-order as big-endian,then CLZ can find the most
+ * significant zero bits.
+ */
+CPU_LE( rev syndrome, syndrome )
+CPU_LE( rev data1, data1 )
+CPU_LE( rev data2, data2 )
+
+ /*
+ * For big-endian we cannot use the trick with the syndrome value
+ * as carry-propagation can corrupt the upper bits if the trailing
+ * bytes in the string contain 0x01.
+ * However, if there is no NUL byte in the dword, we can generate
+ * the result directly. We ca not just subtract the bytes as the
+ * MSB might be significant.
+ */
+CPU_BE( cbnz has_nul, 1f )
+CPU_BE( cmp data1, data2 )
+CPU_BE( cset result, ne )
+CPU_BE( cneg result, result, lo )
+CPU_BE( ret )
+CPU_BE( 1: )
+ /*Re-compute the NUL-byte detection, using a byte-reversed value. */
+CPU_BE( rev tmp3, data1 )
+CPU_BE( sub tmp1, tmp3, zeroones )
+CPU_BE( orr tmp2, tmp3, #REP8_7f )
+CPU_BE( bic has_nul, tmp1, tmp2 )
+CPU_BE( rev has_nul, has_nul )
+CPU_BE( orr syndrome, diff, has_nul )
+
+ clz pos, syndrome
+ /*
+ * The MS-non-zero bit of the syndrome marks either the first bit
+ * that is different, or the top bit of the first zero byte.
+ * Shifting left now will bring the critical information into the
+ * top bits.
+ */
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+ /*
+ * But we need to zero-extend (char is unsigned) the value and then
+ * perform a signed 32-bit subtraction.
+ */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ ret
+ENDPROC(strcmp)
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
new file mode 100644
index 000000000000..987b68b9ce44
--- /dev/null
+++ b/arch/arm64/lib/strlen.S
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * calculate the length of a string
+ *
+ * Parameters:
+ * x0 - const string pointer
+ * Returns:
+ * x0 - the return length of specific string
+ */
+
+/* Arguments and results. */
+srcin .req x0
+len .req x0
+
+/* Locals and temporaries. */
+src .req x1
+data1 .req x2
+data2 .req x3
+data2a .req x4
+has_nul1 .req x5
+has_nul2 .req x6
+tmp1 .req x7
+tmp2 .req x8
+tmp3 .req x9
+tmp4 .req x10
+zeroones .req x11
+pos .req x12
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+ENTRY(strlen)
+ mov zeroones, #REP8_01
+ bic src, srcin, #15
+ ands tmp1, srcin, #15
+ b.ne .Lmisaligned
+ /*
+ * NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+ * can be done in parallel across the entire word.
+ */
+ /*
+ * The inner loop deals with two Dwords at a time. This has a
+ * slightly higher start-up cost, but we should win quite quickly,
+ * especially on cores with a high number of issue slots per
+ * cycle, as we get much better parallelism out of the operations.
+ */
+.Lloop:
+ ldp data1, data2, [src], #16
+.Lrealigned:
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ sub tmp3, data2, zeroones
+ orr tmp4, data2, #REP8_7f
+ bic has_nul1, tmp1, tmp2
+ bics has_nul2, tmp3, tmp4
+ ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
+ b.eq .Lloop
+
+ sub len, src, srcin
+ cbz has_nul1, .Lnul_in_data2
+CPU_BE( mov data2, data1 ) /*prepare data to re-calculate the syndrome*/
+ sub len, len, #8
+ mov has_nul2, has_nul1
+.Lnul_in_data2:
+ /*
+ * For big-endian, carry propagation (if the final byte in the
+ * string is 0x01) means we cannot use has_nul directly. The
+ * easiest way to get the correct byte is to byte-swap the data
+ * and calculate the syndrome a second time.
+ */
+CPU_BE( rev data2, data2 )
+CPU_BE( sub tmp1, data2, zeroones )
+CPU_BE( orr tmp2, data2, #REP8_7f )
+CPU_BE( bic has_nul2, tmp1, tmp2 )
+
+ sub len, len, #8
+ rev has_nul2, has_nul2
+ clz pos, has_nul2
+ add len, len, pos, lsr #3 /* Bits to bytes. */
+ ret
+
+.Lmisaligned:
+ cmp tmp1, #8
+ neg tmp1, tmp1
+ ldp data1, data2, [src], #16
+ lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
+ mov tmp2, #~0
+ /* Big-endian. Early bytes are at MSB. */
+CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
+ /* Little-endian. Early bytes are at LSB. */
+CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
+
+ orr data1, data1, tmp2
+ orr data2a, data2, tmp2
+ csinv data1, data1, xzr, le
+ csel data2, data2, data2a, le
+ b .Lrealigned
+ENDPROC(strlen)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
new file mode 100644
index 000000000000..0224cf5a5533
--- /dev/null
+++ b/arch/arm64/lib/strncmp.S
@@ -0,0 +1,310 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * compare two strings
+ *
+ * Parameters:
+ * x0 - const string 1 pointer
+ * x1 - const string 2 pointer
+ * x2 - the maximal length to be compared
+ * Returns:
+ * x0 - an integer less than, equal to, or greater than zero if s1 is found,
+ * respectively, to be less than, to match, or be greater than s2.
+ */
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+/* Parameters and result. */
+src1 .req x0
+src2 .req x1
+limit .req x2
+result .req x0
+
+/* Internal variables. */
+data1 .req x3
+data1w .req w3
+data2 .req x4
+data2w .req w4
+has_nul .req x5
+diff .req x6
+syndrome .req x7
+tmp1 .req x8
+tmp2 .req x9
+tmp3 .req x10
+zeroones .req x11
+pos .req x12
+limit_wd .req x13
+mask .req x14
+endloop .req x15
+
+ENTRY(strncmp)
+ cbz limit, .Lret0
+ eor tmp1, src1, src2
+ mov zeroones, #REP8_01
+ tst tmp1, #7
+ b.ne .Lmisaligned8
+ ands tmp1, src1, #7
+ b.ne .Lmutual_align
+ /* Calculate the number of full and partial words -1. */
+ /*
+ * when limit is mulitply of 8, if not sub 1,
+ * the judgement of last dword will wrong.
+ */
+ sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
+ lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
+
+ /*
+ * NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+ * can be done in parallel across the entire word.
+ */
+.Lloop_aligned:
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+.Lstart_realigned:
+ subs limit_wd, limit_wd, #1
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ csinv endloop, diff, xzr, pl /* Last Dword or differences.*/
+ bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+ ccmp endloop, #0, #0, eq
+ b.eq .Lloop_aligned
+
+ /*Not reached the limit, must have found the end or a diff. */
+ tbz limit_wd, #63, .Lnot_limit
+
+ /* Limit % 8 == 0 => all bytes significant. */
+ ands limit, limit, #7
+ b.eq .Lnot_limit
+
+ lsl limit, limit, #3 /* Bits -> bytes. */
+ mov mask, #~0
+CPU_BE( lsr mask, mask, limit )
+CPU_LE( lsl mask, mask, limit )
+ bic data1, data1, mask
+ bic data2, data2, mask
+
+ /* Make sure that the NUL byte is marked in the syndrome. */
+ orr has_nul, has_nul, mask
+
+.Lnot_limit:
+ orr syndrome, diff, has_nul
+ b .Lcal_cmpresult
+
+.Lmutual_align:
+ /*
+ * Sources are mutually aligned, but are not currently at an
+ * alignment boundary. Round down the addresses and then mask off
+ * the bytes that precede the start point.
+ * We also need to adjust the limit calculations, but without
+ * overflowing if the limit is near ULONG_MAX.
+ */
+ bic src1, src1, #7
+ bic src2, src2, #7
+ ldr data1, [src1], #8
+ neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */
+ ldr data2, [src2], #8
+ mov tmp2, #~0
+ sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
+ /* Big-endian. Early bytes are at MSB. */
+CPU_BE( lsl tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */
+ /* Little-endian. Early bytes are at LSB. */
+CPU_LE( lsr tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */
+
+ and tmp3, limit_wd, #7
+ lsr limit_wd, limit_wd, #3
+ /* Adjust the limit. Only low 3 bits used, so overflow irrelevant.*/
+ add limit, limit, tmp1
+ add tmp3, tmp3, tmp1
+ orr data1, data1, tmp2
+ orr data2, data2, tmp2
+ add limit_wd, limit_wd, tmp3, lsr #3
+ b .Lstart_realigned
+
+/*when src1 offset is not equal to src2 offset...*/
+.Lmisaligned8:
+ cmp limit, #8
+ b.lo .Ltiny8proc /*limit < 8... */
+ /*
+ * Get the align offset length to compare per byte first.
+ * After this process, one string's address will be aligned.*/
+ and tmp1, src1, #7
+ neg tmp1, tmp1
+ add tmp1, tmp1, #8
+ and tmp2, src2, #7
+ neg tmp2, tmp2
+ add tmp2, tmp2, #8
+ subs tmp3, tmp1, tmp2
+ csel pos, tmp1, tmp2, hi /*Choose the maximum. */
+ /*
+ * Here, limit is not less than 8, so directly run .Ltinycmp
+ * without checking the limit.*/
+ sub limit, limit, pos
+.Ltinycmp:
+ ldrb data1w, [src1], #1
+ ldrb data2w, [src2], #1
+ subs pos, pos, #1
+ ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
+ ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
+ b.eq .Ltinycmp
+ cbnz pos, 1f /*find the null or unequal...*/
+ cmp data1w, #1
+ ccmp data1w, data2w, #0, cs
+ b.eq .Lstart_align /*the last bytes are equal....*/
+1:
+ sub result, data1, data2
+ ret
+
+.Lstart_align:
+ lsr limit_wd, limit, #3
+ cbz limit_wd, .Lremain8
+ /*process more leading bytes to make str1 aligned...*/
+ ands xzr, src1, #7
+ b.eq .Lrecal_offset
+ add src1, src1, tmp3 /*tmp3 is positive in this branch.*/
+ add src2, src2, tmp3
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+
+ sub limit, limit, tmp3
+ lsr limit_wd, limit, #3
+ subs limit_wd, limit_wd, #1
+
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
+ bics has_nul, tmp1, tmp2
+ ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
+ b.ne .Lunequal_proc
+ /*How far is the current str2 from the alignment boundary...*/
+ and tmp3, tmp3, #7
+.Lrecal_offset:
+ neg pos, tmp3
+.Lloopcmp_proc:
+ /*
+ * Divide the eight bytes into two parts. First,backwards the src2
+ * to an alignment boundary,load eight bytes from the SRC2 alignment
+ * boundary,then compare with the relative bytes from SRC1.
+ * If all 8 bytes are equal,then start the second part's comparison.
+ * Otherwise finish the comparison.
+ * This special handle can garantee all the accesses are in the
+ * thread/task space in avoid to overrange access.
+ */
+ ldr data1, [src1,pos]
+ ldr data2, [src2,pos]
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ csinv endloop, diff, xzr, eq
+ cbnz endloop, .Lunequal_proc
+
+ /*The second part process*/
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+ subs limit_wd, limit_wd, #1
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
+ bics has_nul, tmp1, tmp2
+ ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
+ b.eq .Lloopcmp_proc
+
+.Lunequal_proc:
+ orr syndrome, diff, has_nul
+ cbz syndrome, .Lremain8
+.Lcal_cmpresult:
+ /*
+ * reversed the byte-order as big-endian,then CLZ can find the most
+ * significant zero bits.
+ */
+CPU_LE( rev syndrome, syndrome )
+CPU_LE( rev data1, data1 )
+CPU_LE( rev data2, data2 )
+ /*
+ * For big-endian we cannot use the trick with the syndrome value
+ * as carry-propagation can corrupt the upper bits if the trailing
+ * bytes in the string contain 0x01.
+ * However, if there is no NUL byte in the dword, we can generate
+ * the result directly. We can't just subtract the bytes as the
+ * MSB might be significant.
+ */
+CPU_BE( cbnz has_nul, 1f )
+CPU_BE( cmp data1, data2 )
+CPU_BE( cset result, ne )
+CPU_BE( cneg result, result, lo )
+CPU_BE( ret )
+CPU_BE( 1: )
+ /* Re-compute the NUL-byte detection, using a byte-reversed value.*/
+CPU_BE( rev tmp3, data1 )
+CPU_BE( sub tmp1, tmp3, zeroones )
+CPU_BE( orr tmp2, tmp3, #REP8_7f )
+CPU_BE( bic has_nul, tmp1, tmp2 )
+CPU_BE( rev has_nul, has_nul )
+CPU_BE( orr syndrome, diff, has_nul )
+ /*
+ * The MS-non-zero bit of the syndrome marks either the first bit
+ * that is different, or the top bit of the first zero byte.
+ * Shifting left now will bring the critical information into the
+ * top bits.
+ */
+ clz pos, syndrome
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+ /*
+ * But we need to zero-extend (char is unsigned) the value and then
+ * perform a signed 32-bit subtraction.
+ */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ ret
+
+.Lremain8:
+ /* Limit % 8 == 0 => all bytes significant. */
+ ands limit, limit, #7
+ b.eq .Lret0
+.Ltiny8proc:
+ ldrb data1w, [src1], #1
+ ldrb data2w, [src2], #1
+ subs limit, limit, #1
+
+ ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
+ ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
+ b.eq .Ltiny8proc
+ sub result, data1, data2
+ ret
+
+.Lret0:
+ mov result, #0
+ ret
+ENDPROC(strncmp)
diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S
new file mode 100644
index 000000000000..2ca665711bf2
--- /dev/null
+++ b/arch/arm64/lib/strnlen.S
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * determine the length of a fixed-size string
+ *
+ * Parameters:
+ * x0 - const string pointer
+ * x1 - maximal string length
+ * Returns:
+ * x0 - the return length of specific string
+ */
+
+/* Arguments and results. */
+srcin .req x0
+len .req x0
+limit .req x1
+
+/* Locals and temporaries. */
+src .req x2
+data1 .req x3
+data2 .req x4
+data2a .req x5
+has_nul1 .req x6
+has_nul2 .req x7
+tmp1 .req x8
+tmp2 .req x9
+tmp3 .req x10
+tmp4 .req x11
+zeroones .req x12
+pos .req x13
+limit_wd .req x14
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+ENTRY(strnlen)
+ cbz limit, .Lhit_limit
+ mov zeroones, #REP8_01
+ bic src, srcin, #15
+ ands tmp1, srcin, #15
+ b.ne .Lmisaligned
+ /* Calculate the number of full and partial words -1. */
+ sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
+ lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
+
+ /*
+ * NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+ * can be done in parallel across the entire word.
+ */
+ /*
+ * The inner loop deals with two Dwords at a time. This has a
+ * slightly higher start-up cost, but we should win quite quickly,
+ * especially on cores with a high number of issue slots per
+ * cycle, as we get much better parallelism out of the operations.
+ */
+.Lloop:
+ ldp data1, data2, [src], #16
+.Lrealigned:
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ sub tmp3, data2, zeroones
+ orr tmp4, data2, #REP8_7f
+ bic has_nul1, tmp1, tmp2
+ bic has_nul2, tmp3, tmp4
+ subs limit_wd, limit_wd, #1
+ orr tmp1, has_nul1, has_nul2
+ ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
+ b.eq .Lloop
+
+ cbz tmp1, .Lhit_limit /* No null in final Qword. */
+
+ /*
+ * We know there's a null in the final Qword. The easiest thing
+ * to do now is work out the length of the string and return
+ * MIN (len, limit).
+ */
+ sub len, src, srcin
+ cbz has_nul1, .Lnul_in_data2
+CPU_BE( mov data2, data1 ) /*perpare data to re-calculate the syndrome*/
+
+ sub len, len, #8
+ mov has_nul2, has_nul1
+.Lnul_in_data2:
+ /*
+ * For big-endian, carry propagation (if the final byte in the
+ * string is 0x01) means we cannot use has_nul directly. The
+ * easiest way to get the correct byte is to byte-swap the data
+ * and calculate the syndrome a second time.
+ */
+CPU_BE( rev data2, data2 )
+CPU_BE( sub tmp1, data2, zeroones )
+CPU_BE( orr tmp2, data2, #REP8_7f )
+CPU_BE( bic has_nul2, tmp1, tmp2 )
+
+ sub len, len, #8
+ rev has_nul2, has_nul2
+ clz pos, has_nul2
+ add len, len, pos, lsr #3 /* Bits to bytes. */
+ cmp len, limit
+ csel len, len, limit, ls /* Return the lower value. */
+ ret
+
+.Lmisaligned:
+ /*
+ * Deal with a partial first word.
+ * We're doing two things in parallel here;
+ * 1) Calculate the number of words (but avoiding overflow if
+ * limit is near ULONG_MAX) - to do this we need to work out
+ * limit + tmp1 - 1 as a 65-bit value before shifting it;
+ * 2) Load and mask the initial data words - we force the bytes
+ * before the ones we are interested in to 0xff - this ensures
+ * early bytes will not hit any zero detection.
+ */
+ ldp data1, data2, [src], #16
+
+ sub limit_wd, limit, #1
+ and tmp3, limit_wd, #15
+ lsr limit_wd, limit_wd, #4
+
+ add tmp3, tmp3, tmp1
+ add limit_wd, limit_wd, tmp3, lsr #4
+
+ neg tmp4, tmp1
+ lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
+
+ mov tmp2, #~0
+ /* Big-endian. Early bytes are at MSB. */
+CPU_BE( lsl tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */
+ /* Little-endian. Early bytes are at LSB. */
+CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */
+
+ cmp tmp1, #8
+
+ orr data1, data1, tmp2
+ orr data2a, data2, tmp2
+
+ csinv data1, data1, xzr, le
+ csel data2, data2, data2a, le
+ b .Lrealigned
+
+.Lhit_limit:
+ mov len, limit
+ ret
+ENDPROC(strnlen)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index fda756875fa6..23663837acff 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -31,7 +31,7 @@
* Corrupted registers: x0-x7, x9-x11
*/
__flush_dcache_all:
- dsb sy // ensure ordering with previous memory accesses
+ dmb sy // ensure ordering with previous memory accesses
mrs x0, clidr_el1 // read clidr
and x3, x0, #0x7000000 // extract loc from clidr
lsr x3, x3, #23 // left align loc bit field
@@ -128,7 +128,7 @@ USER(9f, dc cvau, x4 ) // clean D line to PoU
add x4, x4, x2
cmp x4, x1
b.lo 1b
- dsb sy
+ dsb ish
icache_line_size x2, x3
sub x3, x2, #1
@@ -139,7 +139,7 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU
cmp x4, x1
b.lo 1b
9: // ignore any faulting cache operation
- dsb sy
+ dsb ish
isb
ret
ENDPROC(flush_icache_range)
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 2fc8258bab2d..023747bf4dd7 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -51,7 +51,11 @@ int pmd_huge(pmd_t pmd)
int pud_huge(pud_t pud)
{
+#ifndef __PAGETABLE_PMD_FOLDED
return !(pud_val(pud) & PUD_TABLE_BIT);
+#else
+ return 0;
+#endif
}
static __init int setup_hugepagesz(char *opt)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index e085ee6ef4e2..c3c7b8f83b55 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -175,7 +175,7 @@ ENDPROC(cpu_do_switch_mm)
ENTRY(__cpu_setup)
ic iallu // I+BTB cache invalidate
tlbi vmalle1is // invalidate I + D TLBs
- dsb sy
+ dsb ish
mov x0, #3 << 20
msr cpacr_el1, x0 // Enable FP/ASIMD
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 90fd1195f276..3876c04feef9 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -36,7 +36,7 @@ void (*pm_power_off)(void);
EXPORT_SYMBOL(pm_power_off);
static const struct desc_ptr no_idt = {};
-static int reboot_mode;
+static enum reboot_mode reboot_mode;
enum reboot_type reboot_type = BOOT_ACPI;
int reboot_force;
@@ -88,11 +88,11 @@ static int __init reboot_setup(char *str)
switch (*str) {
case 'w':
- reboot_mode = 0x1234;
+ reboot_mode = REBOOT_WARM;
break;
case 'c':
- reboot_mode = 0;
+ reboot_mode = REBOOT_COLD;
break;
#ifdef CONFIG_SMP
@@ -552,6 +552,7 @@ static void native_machine_emergency_restart(void)
int i;
int attempt = 0;
int orig_reboot_type = reboot_type;
+ unsigned short mode;
if (reboot_emergency)
emergency_vmx_disable_all();
@@ -559,7 +560,8 @@ static void native_machine_emergency_restart(void)
tboot_shutdown(TB_SHUTDOWN_REBOOT);
/* Tell the BIOS if we want cold or warm reboot */
- *((unsigned short *)__va(0x472)) = reboot_mode;
+ mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0;
+ *((unsigned short *)__va(0x472)) = mode;
for (;;) {
/* Could also try the reset bit in the Hammer NB */
@@ -601,7 +603,7 @@ static void native_machine_emergency_restart(void)
case BOOT_EFI:
if (efi_enabled(EFI_RUNTIME_SERVICES))
- efi.reset_system(reboot_mode ?
+ efi.reset_system(reboot_mode == REBOOT_WARM ?
EFI_RESET_WARM :
EFI_RESET_COLD,
EFI_SUCCESS, 0, NULL);
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index 842d7ba83101..9625ce7ed5f8 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -31,26 +31,20 @@ config CPU_IDLE_GOV_MENU
config ARCH_NEEDS_CPU_IDLE_COUPLED
def_bool n
-config OF_IDLE_STATES
- bool "Idle states DT support"
- depends on ARM || ARM64
- default n
- help
- Allows the CPU idle framework to initialize CPU idle drivers
- state data by using DT provided nodes compliant with idle states
- device tree bindings.
+config DT_IDLE_STATES
+ bool
if CPU_IDLE
+menu "ARM64 CPU Idle Drivers"
+depends on ARM64
+source "drivers/cpuidle/Kconfig.arm64"
+endmenu
+
config CPU_IDLE_CALXEDA
bool "CPU Idle Driver for Calxeda processors"
depends on ARCH_HIGHBANK
help
Select this to enable cpuidle on Calxeda processors.
-menu "ARM64 CPU Idle Drivers"
-depends on ARM64
-source "drivers/cpuidle/Kconfig.arm64"
-endmenu
-
endif
diff --git a/drivers/cpuidle/Kconfig.arm64 b/drivers/cpuidle/Kconfig.arm64
index b83612c67e6d..d0a08ed1b2ee 100644
--- a/drivers/cpuidle/Kconfig.arm64
+++ b/drivers/cpuidle/Kconfig.arm64
@@ -4,10 +4,11 @@
config ARM64_CPUIDLE
bool "Generic ARM64 CPU idle Driver"
- select OF_IDLE_STATES
+ select ARM64_CPU_SUSPEND
+ select DT_IDLE_STATES
help
- Select this to enable generic cpuidle driver for ARM v8.
+ Select this to enable generic cpuidle driver for ARM64.
It provides a generic idle driver whose idle states are configured
at run-time through DT nodes. The CPUidle suspend backend is
- initialized by the device tree parsing code on matching the entry
- method to the respective CPU operations.
+ initialized by calling the CPU operations init idle hook
+ provided by architecture code.
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index 2d97bcfecd00..0bd32cd03f0a 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -5,7 +5,7 @@
obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
obj-$(CONFIG_BIG_LITTLE) += arm_big_little.o
-obj-$(CONFIG_OF_IDLE_STATES) += of_idle_states.o
+obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o
obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o
obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o
diff --git a/drivers/cpuidle/cpuidle-arm64.c b/drivers/cpuidle/cpuidle-arm64.c
index 2cfde6ce3086..50997ea942fc 100644
--- a/drivers/cpuidle/cpuidle-arm64.c
+++ b/drivers/cpuidle/cpuidle-arm64.c
@@ -9,6 +9,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) "CPUidle arm64: " fmt
+
#include <linux/cpuidle.h>
#include <linux/cpumask.h>
#include <linux/cpu_pm.h>
@@ -16,50 +18,23 @@
#include <linux/module.h>
#include <linux/of.h>
-#include <asm/psci.h>
+#include <asm/cpuidle.h>
#include <asm/suspend.h>
-#include "of_idle_states.h"
-
-typedef int (*suspend_init_fn)(struct cpuidle_driver *,
- struct device_node *[]);
-
-struct cpu_suspend_ops {
- const char *id;
- suspend_init_fn init_fn;
-};
-
-static const struct cpu_suspend_ops suspend_operations[] __initconst = {
- {"arm,psci", psci_dt_register_idle_states},
- {}
-};
-
-static __init const struct cpu_suspend_ops *get_suspend_ops(const char *str)
-{
- int i;
-
- if (!str)
- return NULL;
-
- for (i = 0; suspend_operations[i].id; i++)
- if (!strcmp(suspend_operations[i].id, str))
- return &suspend_operations[i];
-
- return NULL;
-}
+#include "dt_idle_states.h"
/*
- * arm_enter_idle_state - Programs CPU to enter the specified state
+ * arm64_enter_idle_state - Programs CPU to enter the specified state
*
- * @dev: cpuidle device
- * @drv: cpuidle driver
- * @idx: state index
+ * dev: cpuidle device
+ * drv: cpuidle driver
+ * idx: state index
*
* Called from the CPUidle framework to program the device to the
* specified target state selected by the governor.
*/
-static int arm_enter_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static int arm64_enter_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
int ret;
@@ -68,30 +43,47 @@ static int arm_enter_idle_state(struct cpuidle_device *dev,
return idx;
}
- cpu_pm_enter();
- /*
- * Pass idle state index to cpu_suspend which in turn will call
- * the CPU ops suspend protocol with idle index as a parameter.
- *
- * Some states would not require context to be saved and flushed
- * to DRAM, so calling cpu_suspend would not be stricly necessary.
- * When power domains specifications for ARM CPUs are finalized then
- * this code can be optimized to prevent saving registers if not
- * needed.
- */
- ret = cpu_suspend(idx);
+ ret = cpu_pm_enter();
+ if (!ret) {
+ /*
+ * Pass idle state index to cpu_suspend which in turn will
+ * call the CPU ops suspend protocol with idle index as a
+ * parameter.
+ */
+ ret = cpu_suspend(idx);
- cpu_pm_exit();
+ cpu_pm_exit();
+ }
return ret ? -1 : idx;
}
-struct cpuidle_driver arm64_idle_driver = {
+static struct cpuidle_driver arm64_idle_driver = {
.name = "arm64_idle",
.owner = THIS_MODULE,
+ /*
+ * State at index 0 is standby wfi and considered standard
+ * on all ARM platforms. If in some platforms simple wfi
+ * can't be used as "state 0", DT bindings must be implemented
+ * to work around this issue and allow installing a special
+ * handler for idle state index 0.
+ */
+ .states[0] = {
+ .enter = arm64_enter_idle_state,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .power_usage = UINT_MAX,
+ .flags = CPUIDLE_FLAG_TIME_VALID,
+ .name = "WFI",
+ .desc = "ARM64 WFI",
+ }
};
-static struct device_node *state_nodes[CPUIDLE_STATE_MAX] __initdata;
+static const struct of_device_id arm64_idle_state_match[] __initconst = {
+ { .compatible = "arm,idle-state",
+ .data = arm64_enter_idle_state },
+ { },
+};
/*
* arm64_idle_init
@@ -102,58 +94,40 @@ static struct device_node *state_nodes[CPUIDLE_STATE_MAX] __initdata;
*/
static int __init arm64_idle_init(void)
{
- int i, ret;
- const char *entry_method;
- struct device_node *idle_states_node;
- const struct cpu_suspend_ops *suspend_init;
+ int cpu, ret;
struct cpuidle_driver *drv = &arm64_idle_driver;
- idle_states_node = of_find_node_by_path("/cpus/idle-states");
- if (!idle_states_node)
- return -ENOENT;
-
- if (of_property_read_string(idle_states_node, "entry-method",
- &entry_method)) {
- pr_warn(" * %s missing entry-method property\n",
- idle_states_node->full_name);
- of_node_put(idle_states_node);
- return -EOPNOTSUPP;
- }
-
- suspend_init = get_suspend_ops(entry_method);
- if (!suspend_init) {
- pr_warn("Missing suspend initializer\n");
- of_node_put(idle_states_node);
- return -EOPNOTSUPP;
- }
-
/*
- * State at index 0 is standby wfi and considered standard
- * on all ARM platforms. If in some platforms simple wfi
- * can't be used as "state 0", DT bindings must be implemented
- * to work around this issue and allow installing a special
- * handler for idle state index 0.
+ * Initialize idle states data, starting at index 1.
+ * This driver is DT only, if no DT idle states are detected (ret == 0)
+ * let the driver initialization fail accordingly since there is no
+ * reason to initialize the idle driver if only wfi is supported.
*/
- drv->states[0].exit_latency = 1;
- drv->states[0].target_residency = 1;
- drv->states[0].flags = CPUIDLE_FLAG_TIME_VALID;
- strncpy(drv->states[0].name, "ARM WFI", CPUIDLE_NAME_LEN);
- strncpy(drv->states[0].desc, "ARM WFI", CPUIDLE_DESC_LEN);
+ ret = dt_init_idle_driver(drv, arm64_idle_state_match, 1);
+ if (ret <= 0) {
+ if (ret)
+ pr_err("failed to initialize idle states\n");
+ return ret ? : -ENODEV;
+ }
- drv->cpumask = (struct cpumask *) cpu_possible_mask;
/*
- * Start at index 1, request idle state nodes to be filled
+ * Call arch CPU operations in order to initialize
+ * idle states suspend back-end specific data
*/
- ret = of_init_idle_driver(drv, state_nodes, 1, true);
- if (ret)
- return ret;
-
- if (suspend_init->init_fn(drv, state_nodes))
- return -EOPNOTSUPP;
+ for_each_possible_cpu(cpu) {
+ ret = cpu_init_idle(cpu);
+ if (ret) {
+ pr_err("CPU %d failed to init idle CPU ops\n", cpu);
+ return ret;
+ }
+ }
- for (i = 0; i < drv->state_count; i++)
- drv->states[i].enter = arm_enter_idle_state;
+ ret = cpuidle_register(drv, NULL);
+ if (ret) {
+ pr_err("failed to register cpuidle driver\n");
+ return ret;
+ }
- return cpuidle_register(drv, NULL);
+ return 0;
}
device_initcall(arm64_idle_init);
diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c
new file mode 100644
index 000000000000..52f4d11bbf3f
--- /dev/null
+++ b/drivers/cpuidle/dt_idle_states.c
@@ -0,0 +1,213 @@
+/*
+ * DT idle states parsing code.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "DT idle-states: " fmt
+
+#include <linux/cpuidle.h>
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include "dt_idle_states.h"
+
+static int init_state_node(struct cpuidle_state *idle_state,
+ const struct of_device_id *matches,
+ struct device_node *state_node)
+{
+ int err;
+ const struct of_device_id *match_id;
+
+ match_id = of_match_node(matches, state_node);
+ if (!match_id)
+ return -ENODEV;
+ /*
+ * CPUidle drivers are expected to initialize the const void *data
+ * pointer of the passed in struct of_device_id array to the idle
+ * state enter function.
+ */
+ idle_state->enter = match_id->data;
+
+ err = of_property_read_u32(state_node, "wakeup-latency-us",
+ &idle_state->exit_latency);
+ if (err) {
+ u32 entry_latency, exit_latency;
+
+ err = of_property_read_u32(state_node, "entry-latency-us",
+ &entry_latency);
+ if (err) {
+ pr_debug(" * %s missing entry-latency-us property\n",
+ state_node->full_name);
+ return -EINVAL;
+ }
+
+ err = of_property_read_u32(state_node, "exit-latency-us",
+ &exit_latency);
+ if (err) {
+ pr_debug(" * %s missing exit-latency-us property\n",
+ state_node->full_name);
+ return -EINVAL;
+ }
+ /*
+ * If wakeup-latency-us is missing, default to entry+exit
+ * latencies as defined in idle states bindings
+ */
+ idle_state->exit_latency = entry_latency + exit_latency;
+ }
+
+ err = of_property_read_u32(state_node, "min-residency-us",
+ &idle_state->target_residency);
+ if (err) {
+ pr_debug(" * %s missing min-residency-us property\n",
+ state_node->full_name);
+ return -EINVAL;
+ }
+
+ idle_state->flags = CPUIDLE_FLAG_TIME_VALID;
+ if (of_property_read_bool(state_node, "local-timer-stop"))
+ idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP;
+ /*
+ * TODO:
+ * replace with kstrdup and pointer assignment when name
+ * and desc become string pointers
+ */
+ strncpy(idle_state->name, state_node->name, CPUIDLE_NAME_LEN - 1);
+ strncpy(idle_state->desc, state_node->name, CPUIDLE_DESC_LEN - 1);
+ return 0;
+}
+
+/*
+ * Check that the idle state is uniform across all CPUs in the CPUidle driver
+ * cpumask
+ */
+static bool idle_state_valid(struct device_node *state_node, unsigned int idx,
+ const cpumask_t *cpumask)
+{
+ int cpu;
+ struct device_node *cpu_node, *curr_state_node;
+ bool valid = true;
+
+ /*
+ * Compare idle state phandles for index idx on all CPUs in the
+ * CPUidle driver cpumask. Start from next logical cpu following
+ * cpumask_first(cpumask) since that's the CPU state_node was
+ * retrieved from. If a mismatch is found bail out straight
+ * away since we certainly hit a firmware misconfiguration.
+ */
+ for (cpu = cpumask_next(cpumask_first(cpumask), cpumask);
+ cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpumask)) {
+ cpu_node = of_cpu_device_node_get(cpu);
+ curr_state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
+ idx);
+ if (state_node != curr_state_node)
+ valid = false;
+
+ of_node_put(curr_state_node);
+ of_node_put(cpu_node);
+ if (!valid)
+ break;
+ }
+
+ return valid;
+}
+
+/**
+ * dt_init_idle_driver() - Parse the DT idle states and initialize the
+ * idle driver states array
+ * @drv: Pointer to CPU idle driver to be initialized
+ * @matches: Array of of_device_id match structures to search in for
+ * compatible idle state nodes. The data pointer for each valid
+ * struct of_device_id entry in the matches array must point to
+ * a function with the following signature, that corresponds to
+ * the CPUidle state enter function signature:
+ *
+ * int (*)(struct cpuidle_device *dev,
+ * struct cpuidle_driver *drv,
+ * int index);
+ *
+ * @start_idx: First idle state index to be initialized
+ *
+ * If DT idle states are detected and are valid the state count and states
+ * array entries in the cpuidle driver are initialized accordingly starting
+ * from index start_idx.
+ *
+ * Return: number of valid DT idle states parsed, <0 on failure
+ */
+int dt_init_idle_driver(struct cpuidle_driver *drv,
+ const struct of_device_id *matches,
+ unsigned int start_idx)
+{
+ struct cpuidle_state *idle_state;
+ struct device_node *state_node, *cpu_node;
+ int i, err = 0;
+ const cpumask_t *cpumask;
+ unsigned int state_idx = start_idx;
+
+ if (state_idx >= CPUIDLE_STATE_MAX)
+ return -EINVAL;
+ /*
+ * We get the idle states for the first logical cpu in the
+ * driver mask (or cpu_possible_mask if the driver cpumask is not set)
+ * and we check through idle_state_valid() if they are uniform
+ * across CPUs, otherwise we hit a firmware misconfiguration.
+ */
+ cpumask = drv->cpumask ? : cpu_possible_mask;
+ cpu_node = of_cpu_device_node_get(cpumask_first(cpumask));
+
+ for (i = 0; ; i++) {
+ state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
+ if (!state_node)
+ break;
+
+ if (!idle_state_valid(state_node, i, cpumask)) {
+ pr_warn("%s idle state not valid, bailing out\n",
+ state_node->full_name);
+ err = -EINVAL;
+ break;
+ }
+
+ if (state_idx == CPUIDLE_STATE_MAX) {
+ pr_warn("State index reached static CPU idle driver states array size\n");
+ break;
+ }
+
+ idle_state = &drv->states[state_idx++];
+ err = init_state_node(idle_state, matches, state_node);
+ if (err) {
+ pr_err("Parsing idle state node %s failed with err %d\n",
+ state_node->full_name, err);
+ err = -EINVAL;
+ break;
+ }
+ of_node_put(state_node);
+ }
+
+ of_node_put(state_node);
+ of_node_put(cpu_node);
+ if (err)
+ return err;
+ /*
+ * Update the driver state count only if some valid DT idle states
+ * were detected
+ */
+ if (i)
+ drv->state_count = state_idx;
+
+ /*
+ * Return the number of present and valid DT idle states, which can
+ * also be 0 on platforms with missing DT idle states or legacy DT
+ * configuration predating the DT idle states bindings.
+ */
+ return i;
+}
+EXPORT_SYMBOL_GPL(dt_init_idle_driver);
diff --git a/drivers/cpuidle/dt_idle_states.h b/drivers/cpuidle/dt_idle_states.h
new file mode 100644
index 000000000000..4818134bc65b
--- /dev/null
+++ b/drivers/cpuidle/dt_idle_states.h
@@ -0,0 +1,7 @@
+#ifndef __DT_IDLE_STATES
+#define __DT_IDLE_STATES
+
+int dt_init_idle_driver(struct cpuidle_driver *drv,
+ const struct of_device_id *matches,
+ unsigned int start_idx);
+#endif
diff --git a/drivers/cpuidle/of_idle_states.c b/drivers/cpuidle/of_idle_states.c
deleted file mode 100644
index eceb1b4c4657..000000000000
--- a/drivers/cpuidle/of_idle_states.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * OF idle states parsing code.
- *
- * Copyright (C) 2014 ARM Ltd.
- * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/cpuidle.h>
-#include <linux/cpumask.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/list_sort.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/slab.h>
-
-#include "of_idle_states.h"
-
-struct state_elem {
- struct list_head list;
- struct device_node *node;
- int val;
-};
-
-static struct list_head head __initdata = LIST_HEAD_INIT(head);
-
-static bool __init state_cpu_valid(struct device_node *state_node,
- struct device_node *cpu_node)
-{
- int i = 0;
- struct device_node *cpu_state;
-
- while ((cpu_state = of_parse_phandle(cpu_node,
- "cpu-idle-states", i++))) {
- if (cpu_state && state_node == cpu_state) {
- of_node_put(cpu_state);
- return true;
- }
- of_node_put(cpu_state);
- }
- return false;
-}
-
-static bool __init state_cpus_valid(const cpumask_t *cpus,
- struct device_node *state_node)
-{
- int cpu;
- struct device_node *cpu_node;
-
- /*
- * Check if state is valid on driver cpumask cpus
- */
- for_each_cpu(cpu, cpus) {
- cpu_node = of_get_cpu_node(cpu, NULL);
-
- if (!cpu_node) {
- pr_err("Missing device node for CPU %d\n", cpu);
- return false;
- }
-
- if (!state_cpu_valid(state_node, cpu_node))
- return false;
- }
-
- return true;
-}
-
-static int __init state_cmp(void *priv, struct list_head *a,
- struct list_head *b)
-{
- struct state_elem *ela, *elb;
-
- ela = container_of(a, struct state_elem, list);
- elb = container_of(b, struct state_elem, list);
-
- return ela->val - elb->val;
-}
-
-static int __init add_state_node(cpumask_t *cpumask,
- struct device_node *state_node)
-{
- struct state_elem *el;
- u32 val;
-
- pr_debug(" * %s...\n", state_node->full_name);
-
- if (!state_cpus_valid(cpumask, state_node))
- return -EINVAL;
- /*
- * Parse just the value required to sort the states.
- */
- if (of_property_read_u32(state_node, "min-residency-us",
- &val)) {
- pr_debug(" * %s missing min-residency-us property\n",
- state_node->full_name);
- return -EINVAL;
- }
-
- el = kmalloc(sizeof(*el), GFP_KERNEL);
- if (!el) {
- pr_err("%s failed to allocate memory\n", __func__);
- return -ENOMEM;
- }
-
- el->node = state_node;
- el->val = val;
- list_add_tail(&el->list, &head);
-
- return 0;
-}
-
-static void __init init_state_node(struct cpuidle_driver *drv,
- struct device_node *state_node,
- int *cnt)
-{
- struct cpuidle_state *idle_state;
-
- pr_debug(" * %s...\n", state_node->full_name);
-
- idle_state = &drv->states[*cnt];
-
- if (of_property_read_u32(state_node, "exit-latency-us",
- &idle_state->exit_latency)) {
- pr_debug(" * %s missing exit-latency-us property\n",
- state_node->full_name);
- return;
- }
-
- if (of_property_read_u32(state_node, "min-residency-us",
- &idle_state->target_residency)) {
- pr_debug(" * %s missing min-residency-us property\n",
- state_node->full_name);
- return;
- }
- /*
- * It is unknown to the idle driver if and when the tick_device
- * loses context when the CPU enters the idle states. To solve
- * this issue the tick device must be linked to a power domain
- * so that the idle driver can check on which states the device
- * loses its context. Current code takes the conservative choice
- * of defining the idle state as one where the tick device always
- * loses its context. On platforms where tick device never loses
- * its context (ie it is not a C3STOP device) this turns into
- * a nop. On platforms where the tick device does lose context in some
- * states, this code can be optimized, when power domain specifications
- * for ARM CPUs are finalized.
- */
- idle_state->flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TIMER_STOP;
-
- strncpy(idle_state->name, state_node->name, CPUIDLE_NAME_LEN);
- strncpy(idle_state->desc, state_node->name, CPUIDLE_NAME_LEN);
-
- (*cnt)++;
-}
-
-static int __init init_idle_states(struct cpuidle_driver *drv,
- struct device_node *state_nodes[],
- unsigned int start_idx, bool init_nodes)
-{
- struct state_elem *el;
- struct list_head *curr, *tmp;
- unsigned int cnt = start_idx;
-
- list_for_each_entry(el, &head, list) {
- /*
- * Check if the init function has to fill the
- * state_nodes array on behalf of the CPUidle driver.
- */
- if (init_nodes)
- state_nodes[cnt] = el->node;
- /*
- * cnt is updated on return if a state was added.
- */
- init_state_node(drv, el->node, &cnt);
-
- if (cnt == CPUIDLE_STATE_MAX) {
- pr_warn("State index reached static CPU idle state limit\n");
- break;
- }
- }
-
- drv->state_count = cnt;
-
- list_for_each_safe(curr, tmp, &head) {
- list_del(curr);
- kfree(container_of(curr, struct state_elem, list));
- }
-
- /*
- * If no idle states are detected, return an error and let the idle
- * driver initialization fail accordingly.
- */
- return (cnt > start_idx) ? 0 : -ENODATA;
-}
-
-static void __init add_idle_states(struct cpuidle_driver *drv,
- struct device_node *idle_states)
-{
- struct device_node *state_node;
-
- for_each_child_of_node(idle_states, state_node) {
- if ((!of_device_is_compatible(state_node, "arm,idle-state"))) {
- pr_warn(" * %s: children of /cpus/idle-states must be \"arm,idle-state\" compatible\n",
- state_node->full_name);
- continue;
- }
- /*
- * If memory allocation fails, better bail out.
- * Initialized nodes are freed at initialization
- * completion in of_init_idle_driver().
- */
- if ((add_state_node(drv->cpumask, state_node) == -ENOMEM))
- break;
- }
- /*
- * Sort the states list before initializing the CPUidle driver
- * states array.
- */
- list_sort(NULL, &head, state_cmp);
-}
-
-/*
- * of_init_idle_driver - Parse the DT idle states and initialize the
- * idle driver states array
- *
- * @drv: Pointer to CPU idle driver to be initialized
- * @state_nodes: Array of struct device_nodes to be initialized if
- * init_nodes == true. Must be sized CPUIDLE_STATE_MAX
- * @start_idx: First idle state index to be initialized
- * @init_nodes: Boolean to request device nodes initialization
- *
- * Returns:
- * 0 on success
- * <0 on failure
- *
- * On success the states array in the cpuidle driver contains
- * initialized entries in the states array, starting from index start_idx.
- * If init_nodes == true, on success the state_nodes array is initialized
- * with idle state DT node pointers, starting from index start_idx,
- * in a 1:1 relation with the idle driver states array.
- */
-int __init of_init_idle_driver(struct cpuidle_driver *drv,
- struct device_node *state_nodes[],
- unsigned int start_idx, bool init_nodes)
-{
- struct device_node *idle_states_node;
- int ret;
-
- if (start_idx >= CPUIDLE_STATE_MAX) {
- pr_warn("State index exceeds static CPU idle driver states array size\n");
- return -EINVAL;
- }
-
- if (WARN(init_nodes && !state_nodes,
- "Requested nodes stashing in an invalid nodes container\n"))
- return -EINVAL;
-
- idle_states_node = of_find_node_by_path("/cpus/idle-states");
- if (!idle_states_node)
- return -ENOENT;
-
- add_idle_states(drv, idle_states_node);
-
- ret = init_idle_states(drv, state_nodes, start_idx, init_nodes);
-
- of_node_put(idle_states_node);
-
- return ret;
-}
diff --git a/drivers/cpuidle/of_idle_states.h b/drivers/cpuidle/of_idle_states.h
deleted file mode 100644
index 049f94ff4428..000000000000
--- a/drivers/cpuidle/of_idle_states.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __OF_IDLE_STATES
-#define __OF_IDLE_STATES
-
-int __init of_init_idle_driver(struct cpuidle_driver *drv,
- struct device_node *state_nodes[],
- unsigned int start_idx,
- bool init_nodes);
-#endif
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 901b7435e890..192c783c2ec6 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -1,6 +1,7 @@
#ifndef _LINUX_OF_DEVICE_H
#define _LINUX_OF_DEVICE_H
+#include <linux/cpu.h>
#include <linux/platform_device.h>
#include <linux/of_platform.h> /* temporary until merge */
@@ -43,6 +44,15 @@ static inline void of_device_node_put(struct device *dev)
of_node_put(dev->of_node);
}
+static inline struct device_node *of_cpu_device_node_get(int cpu)
+{
+ struct device *cpu_dev;
+ cpu_dev = get_cpu_device(cpu);
+ if (!cpu_dev)
+ return NULL;
+ return of_node_get(cpu_dev->of_node);
+}
+
#else /* CONFIG_OF_DEVICE */
static inline int of_driver_match_device(struct device *dev,
@@ -67,6 +77,11 @@ static inline const struct of_device_id *of_match_device(
{
return NULL;
}
+
+static inline struct device_node *of_cpu_device_node_get(int cpu)
+{
+ return NULL;
+}
#endif /* CONFIG_OF_DEVICE */
#endif /* _LINUX_OF_DEVICE_H */
diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 23b36304cd88..2ca9ed7cfc9b 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -10,6 +10,11 @@
#define SYS_HALT 0x0002 /* Notify of system halt */
#define SYS_POWER_OFF 0x0003 /* Notify of system power off */
+enum reboot_mode {
+ REBOOT_COLD = 0,
+ REBOOT_WARM,
+};
+
extern int register_reboot_notifier(struct notifier_block *);
extern int unregister_reboot_notifier(struct notifier_block *);