43 files changed, 1254 insertions, 2113 deletions
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 81f4edec0c2a..3c1afa524b9c 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += user.h
diff --git a/arch/arc/include/asm/atomic-llsc.h b/arch/arc/include/asm/atomic-llsc.h
new file mode 100644
index 000000000000..1b0ffaeee16d
--- /dev/null
+++ b/arch/arc/include/asm/atomic-llsc.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARC_ATOMIC_LLSC_H
+#define _ASM_ARC_ATOMIC_LLSC_H
+
+#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
+
+#define ATOMIC_OP(op, asm_op)					\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
+{									\
+	unsigned int val;						\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[val], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[val], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"	bnz     1b				\n"		\
+	: [val]	"=&r"	(val) /* Early clobber to prevent reg reuse */	\
+	: [ctr]	"r"	(&v->counter), /* Not "m": llock only supports reg direct addr mode */	\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+}									\
+
+#define ATOMIC_OP_RETURN(op, asm_op)				\
+static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)	\
+{									\
+	unsigned int val;						\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[val], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[val], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"	bnz     1b				\n"		\
+	: [val]	"=&r"	(val)						\
+	: [ctr]	"r"	(&v->counter),					\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+									\
+	return val;							\
+}
+
+#define arch_atomic_add_return_relaxed		arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed		arch_atomic_sub_return_relaxed
+
+#define ATOMIC_FETCH_OP(op, asm_op)				\
+static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
+{									\
+	unsigned int val, orig;						\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[orig], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"	bnz     1b				\n"		\
+	: [val]	"=&r"	(val),						\
+	  [orig] "=&r" (orig)						\
+	: [ctr]	"r"	(&v->counter),					\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+									\
+	return orig;							\
+}
+
+#define arch_atomic_fetch_add_relaxed		arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed		arch_atomic_fetch_sub_relaxed
+
+#define arch_atomic_fetch_and_relaxed		arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_andnot_relaxed	arch_atomic_fetch_andnot_relaxed
+#define arch_atomic_fetch_or_relaxed		arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed		arch_atomic_fetch_xor_relaxed
+
+#define ATOMIC_OPS(op, asm_op)					\
+	ATOMIC_OP(op, asm_op)					\
+	ATOMIC_OP_RETURN(op, asm_op)				\
+	ATOMIC_FETCH_OP(op, asm_op)
+
+ATOMIC_OPS(add, add)
+ATOMIC_OPS(sub, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm_op)					\
+	ATOMIC_OP(op, asm_op)					\
+	ATOMIC_FETCH_OP(op, asm_op)
+
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, or)
+ATOMIC_OPS(xor, xor)
+
+#define arch_atomic_andnot		arch_atomic_andnot
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#endif
diff --git a/arch/arc/include/asm/atomic-spinlock.h b/arch/arc/include/asm/atomic-spinlock.h
new file mode 100644
index 000000000000..2c830347bfb4
--- /dev/null
+++ b/arch/arc/include/asm/atomic-spinlock.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARC_ATOMIC_SPLOCK_H
+#define _ASM_ARC_ATOMIC_SPLOCK_H
+
+/*
+ * Non hardware assisted Atomic-R-M-W
+ * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
+ */
+
+static inline void arch_atomic_set(atomic_t *v, int i)
+{
+	/*
+	 * Independent of hardware support, all of the atomic_xxx() APIs need
+	 * to follow the same locking rules to make sure that a "hardware"
+	 * atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
+	 * sequence
+	 *
+	 * Thus atomic_set() despite being 1 insn (and seemingly atomic)
+	 * requires the locking.
+	 */
+	unsigned long flags;
+
+	atomic_ops_lock(flags);
+	WRITE_ONCE(v->counter, i);
+	atomic_ops_unlock(flags);
+}
+
+#define arch_atomic_set_release(v, i)	arch_atomic_set((v), (i))
+
+#define ATOMIC_OP(op, c_op, asm_op)					\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long flags;						\
+									\
+	atomic_ops_lock(flags);						\
+	v->counter c_op i;						\
+	atomic_ops_unlock(flags);					\
+}
+
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
+{									\
+	unsigned long flags;						\
+	unsigned int temp;						\
+									\
+	/*								\
+	 * spin lock/unlock provides the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(flags);						\
+	temp = v->counter;						\
+	temp c_op i;							\
+	v->counter = temp;						\
+	atomic_ops_unlock(flags);					\
+									\
+	return temp;							\
+}
+
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	unsigned long flags;						\
+	unsigned int orig;						\
+									\
+	/*								\
+	 * spin lock/unlock provides the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(flags);						\
+	orig = v->counter;						\
+	v->counter c_op i;						\
+	atomic_ops_unlock(flags);					\
+									\
+	return orig;							\
+}
+
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(add, +=, add)
+ATOMIC_OPS(sub, -=, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
+
+#define arch_atomic_andnot		arch_atomic_andnot
+#define arch_atomic_fetch_andnot	arch_atomic_fetch_andnot
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#endif
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 7298ce84762e..52ee51e1ff7c 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -14,544 +14,46 @@
 #include <asm/barrier.h>
 #include <asm/smp.h>
 
-#define ATOMIC_INIT(i)	{ (i) }
-
-#ifndef CONFIG_ARC_PLAT_EZNPS
-
-#define atomic_read(v)  READ_ONCE((v)->counter)
+#define arch_atomic_read(v)  READ_ONCE((v)->counter)
 
 #ifdef CONFIG_ARC_HAS_LLSC
-
-#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
-
-#define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void atomic_##op(int i, atomic_t *v)			\
-{									\
-	unsigned int val;						\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock   %[val], [%[ctr]]		\n"		\
-	"	" #asm_op " %[val], %[val], %[i]	\n"		\
-	"	scond   %[val], [%[ctr]]		\n"		\
-	"	bnz     1b				\n"		\
-	: [val]	"=&r"	(val) /* Early clobber to prevent reg reuse */	\
-	: [ctr]	"r"	(&v->counter), /* Not "m": llock only supports reg direct addr mode */	\
-	  [i]	"ir"	(i)						\
-	: "cc");							\
-}									\
-
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
-{									\
-	unsigned int val;						\
-									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND thmeselves don't provide any such semantics	\
-	 */								\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock   %[val], [%[ctr]]		\n"		\
-	"	" #asm_op " %[val], %[val], %[i]	\n"		\
-	"	scond   %[val], [%[ctr]]		\n"		\
-	"	bnz     1b				\n"		\
-	: [val]	"=&r"	(val)						\
-	: [ctr]	"r"	(&v->counter),					\
-	  [i]	"ir"	(i)						\
-	: "cc");							\
-									\
-	smp_mb();							\
-									\
-	return val;							\
-}
-
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
-{									\
-	unsigned int val, orig;						\
-									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND thmeselves don't provide any such semantics	\
-	 */								\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock   %[orig], [%[ctr]]		\n"		\
-	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
-	"	scond   %[val], [%[ctr]]		\n"		\
-	"	bnz     1b				\n"		\
-	: [val]	"=&r"	(val),						\
-	  [orig] "=&r" (orig)						\
-	: [ctr]	"r"	(&v->counter),					\
-	  [i]	"ir"	(i)						\
-	: "cc");							\
-									\
-	smp_mb();							\
-									\
-	return orig;							\
-}
-
-#else	/* !CONFIG_ARC_HAS_LLSC */
-
-#ifndef CONFIG_SMP
-
- /* violating atomic_xxx API locking protocol in UP for optimization sake */
-#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
-
+#include <asm/atomic-llsc.h>
 #else
+#include <asm/atomic-spinlock.h>
+#endif
 
-static inline void atomic_set(atomic_t *v, int i)
-{
-	/*
-	 * Independent of hardware support, all of the atomic_xxx() APIs need
-	 * to follow the same locking rules to make sure that a "hardware"
-	 * atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
-	 * sequence
-	 *
-	 * Thus atomic_set() despite being 1 insn (and seemingly atomic)
-	 * requires the locking.
-	 */
-	unsigned long flags;
+#define arch_atomic_cmpxchg(v, o, n)					\
+({									\
+	arch_cmpxchg(&((v)->counter), (o), (n));			\
+})
 
-	atomic_ops_lock(flags);
-	WRITE_ONCE(v->counter, i);
-	atomic_ops_unlock(flags);
-}
+#ifdef arch_cmpxchg_relaxed
+#define arch_atomic_cmpxchg_relaxed(v, o, n)				\
+({									\
+	arch_cmpxchg_relaxed(&((v)->counter), (o), (n));		\
+})
+#endif
 
-#define atomic_set_release(v, i)	atomic_set((v), (i))
+#define arch_atomic_xchg(v, n)						\
+({									\
+	arch_xchg(&((v)->counter), (n));				\
+})
 
+#ifdef arch_xchg_relaxed
+#define arch_atomic_xchg_relaxed(v, n)					\
+({									\
+	arch_xchg_relaxed(&((v)->counter), (n));			\
+})
 #endif
 
 /*
- * Non hardware assisted Atomic-R-M-W
- * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
+ * 64-bit atomics
  */
-
-#define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void atomic_##op(int i, atomic_t *v)			\
-{									\
-	unsigned long flags;						\
-									\
-	atomic_ops_lock(flags);						\
-	v->counter c_op i;						\
-	atomic_ops_unlock(flags);					\
-}
-
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
-{									\
-	unsigned long flags;						\
-	unsigned long temp;						\
-									\
-	/*								\
-	 * spin lock/unlock provides the needed smp_mb() before/after	\
-	 */								\
-	atomic_ops_lock(flags);						\
-	temp = v->counter;						\
-	temp c_op i;							\
-	v->counter = temp;						\
-	atomic_ops_unlock(flags);					\
-									\
-	return temp;							\
-}
-
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
-{									\
-	unsigned long flags;						\
-	unsigned long orig;						\
-									\
-	/*								\
-	 * spin lock/unlock provides the needed smp_mb() before/after	\
-	 */								\
-	atomic_ops_lock(flags);						\
-	orig = v->counter;						\
-	v->counter c_op i;						\
-	atomic_ops_unlock(flags);					\
-									\
-	return orig;							\
-}
-
-#endif /* !CONFIG_ARC_HAS_LLSC */
-
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(add, +=, add)
-ATOMIC_OPS(sub, -=, sub)
-
-#define atomic_andnot		atomic_andnot
-#define atomic_fetch_andnot	atomic_fetch_andnot
-
-#undef ATOMIC_OPS
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(and, &=, and)
-ATOMIC_OPS(andnot, &= ~, bic)
-ATOMIC_OPS(or, |=, or)
-ATOMIC_OPS(xor, ^=, xor)
-
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-static inline int atomic_read(const atomic_t *v)
-{
-	int temp;
-
-	__asm__ __volatile__(
-	"	ld.di %0, [%1]"
-	: "=r"(temp)
-	: "r"(&v->counter)
-	: "memory");
-	return temp;
-}
-
-static inline void atomic_set(atomic_t *v, int i)
-{
-	__asm__ __volatile__(
-	"	st.di %0,[%1]"
-	:
-	: "r"(i), "r"(&v->counter)
-	: "memory");
-}
-
-#define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void atomic_##op(int i, atomic_t *v)			\
-{									\
-	__asm__ __volatile__(						\
-	"	mov r2, %0\n"						\
-	"	mov r3, %1\n"						\
-	"       .word %2\n"						\
-	:								\
-	: "r"(i), "r"(&v->counter), "i"(asm_op)				\
-	: "r2", "r3", "memory");					\
-}									\
-
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
-{									\
-	unsigned int temp = i;						\
-									\
-	/* Explicit full memory barrier needed before/after */		\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"	mov r2, %0\n"						\
-	"	mov r3, %1\n"						\
-	"       .word %2\n"						\
-	"	mov %0, r2"						\
-	: "+r"(temp)							\
-	: "r"(&v->counter), "i"(asm_op)					\
-	: "r2", "r3", "memory");					\
-									\
-	smp_mb();							\
-									\
-	temp c_op i;							\
-									\
-	return temp;							\
-}
-
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
-{									\
-	unsigned int temp = i;						\
-									\
-	/* Explicit full memory barrier needed before/after */		\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"	mov r2, %0\n"						\
-	"	mov r3, %1\n"						\
-	"       .word %2\n"						\
-	"	mov %0, r2"						\
-	: "+r"(temp)							\
-	: "r"(&v->counter), "i"(asm_op)					\
-	: "r2", "r3", "memory");					\
-									\
-	smp_mb();							\
-									\
-	return temp;							\
-}
-
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
-#define atomic_sub(i, v) atomic_add(-(i), (v))
-#define atomic_sub_return(i, v) atomic_add_return(-(i), (v))
-#define atomic_fetch_sub(i, v) atomic_fetch_add(-(i), (v))
-
-#undef ATOMIC_OPS
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
-ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
-ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
-
-#endif /* CONFIG_ARC_PLAT_EZNPS */
-
-#undef ATOMIC_OPS
-#undef ATOMIC_FETCH_OP
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
-
 #ifdef CONFIG_GENERIC_ATOMIC64
-
 #include <asm-generic/atomic64.h>
-
-#else	/* Kconfig ensures this is only enabled with needed h/w assist */
-
-/*
- * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
- *  - The address HAS to be 64-bit aligned
- *  - There are 2 semantics involved here:
- *    = exclusive implies no interim update between load/store to same addr
- *    = both words are observed/updated together: this is guaranteed even
- *      for regular 64-bit load (LDD) / store (STD). Thus atomic64_set()
- *      is NOT required to use LLOCKD+SCONDD, STD suffices
- */
-
-typedef struct {
-	s64 __aligned(8) counter;
-} atomic64_t;
-
-#define ATOMIC64_INIT(a) { (a) }
-
-static inline s64 atomic64_read(const atomic64_t *v)
-{
-	s64 val;
-
-	__asm__ __volatile__(
-	"	ldd   %0, [%1]	\n"
-	: "=r"(val)
-	: "r"(&v->counter));
-
-	return val;
-}
-
-static inline void atomic64_set(atomic64_t *v, s64 a)
-{
-	/*
-	 * This could have been a simple assignment in "C" but would need
-	 * explicit volatile. Otherwise gcc optimizers could elide the store
-	 * which borked atomic64 self-test
-	 * In the inline asm version, memory clobber needed for exact same
-	 * reason, to tell gcc about the store.
-	 *
-	 * This however is not needed for sibling atomic64_add() etc since both
-	 * load/store are explicitly done in inline asm. As long as API is used
-	 * for each access, gcc has no way to optimize away any load/store
-	 */
-	__asm__ __volatile__(
-	"	std   %0, [%1]	\n"
-	:
-	: "r"(a), "r"(&v->counter)
-	: "memory");
-}
-
-#define ATOMIC64_OP(op, op1, op2)					\
-static inline void atomic64_##op(s64 a, atomic64_t *v)			\
-{									\
-	s64 val;							\
-									\
-	__asm__ __volatile__(						\
-	"1:				\n"				\
-	"	llockd  %0, [%1]	\n"				\
-	"	" #op1 " %L0, %L0, %L2	\n"				\
-	"	" #op2 " %H0, %H0, %H2	\n"				\
-	"	scondd   %0, [%1]	\n"				\
-	"	bnz     1b		\n"				\
-	: "=&r"(val)							\
-	: "r"(&v->counter), "ir"(a)					\
-	: "cc");							\
-}									\
-
-#define ATOMIC64_OP_RETURN(op, op1, op2)		        	\
-static inline s64 atomic64_##op##_return(s64 a, atomic64_t *v)		\
-{									\
-	s64 val;							\
-									\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:				\n"				\
-	"	llockd   %0, [%1]	\n"				\
-	"	" #op1 " %L0, %L0, %L2	\n"				\
-	"	" #op2 " %H0, %H0, %H2	\n"				\
-	"	scondd   %0, [%1]	\n"				\
-	"	bnz     1b		\n"				\
-	: [val] "=&r"(val)						\
-	: "r"(&v->counter), "ir"(a)					\
-	: "cc");	/* memory clobber comes from smp_mb() */	\
-									\
-	smp_mb();							\
-									\
-	return val;							\
-}
-
-#define ATOMIC64_FETCH_OP(op, op1, op2)		        		\
-static inline s64 atomic64_fetch_##op(s64 a, atomic64_t *v)		\
-{									\
-	s64 val, orig;							\
-									\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:				\n"				\
-	"	llockd   %0, [%2]	\n"				\
-	"	" #op1 " %L1, %L0, %L3	\n"				\
-	"	" #op2 " %H1, %H0, %H3	\n"				\
-	"	scondd   %1, [%2]	\n"				\
-	"	bnz     1b		\n"				\
-	: "=&r"(orig), "=&r"(val)					\
-	: "r"(&v->counter), "ir"(a)					\
-	: "cc");	/* memory clobber comes from smp_mb() */	\
-									\
-	smp_mb();							\
-									\
-	return orig;							\
-}
-
-#define ATOMIC64_OPS(op, op1, op2)					\
-	ATOMIC64_OP(op, op1, op2)					\
-	ATOMIC64_OP_RETURN(op, op1, op2)				\
-	ATOMIC64_FETCH_OP(op, op1, op2)
-
-#define atomic64_andnot		atomic64_andnot
-#define atomic64_fetch_andnot	atomic64_fetch_andnot
-
-ATOMIC64_OPS(add, add.f, adc)
-ATOMIC64_OPS(sub, sub.f, sbc)
-ATOMIC64_OPS(and, and, and)
-ATOMIC64_OPS(andnot, bic, bic)
-ATOMIC64_OPS(or, or, or)
-ATOMIC64_OPS(xor, xor, xor)
-
-#undef ATOMIC64_OPS
-#undef ATOMIC64_FETCH_OP
-#undef ATOMIC64_OP_RETURN
-#undef ATOMIC64_OP
-
-static inline s64
-atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
-{
-	s64 prev;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%1]	\n"
-	"	brne    %L0, %L2, 2f	\n"
-	"	brne    %H0, %H2, 2f	\n"
-	"	scondd  %3, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(prev)
-	: "r"(ptr), "ir"(expected), "r"(new)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return prev;
-}
-
-static inline s64 atomic64_xchg(atomic64_t *ptr, s64 new)
-{
-	s64 prev;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%1]	\n"
-	"	scondd  %2, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(prev)
-	: "r"(ptr), "r"(new)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return prev;
-}
-
-/**
- * atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic64_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
-
-static inline s64 atomic64_dec_if_positive(atomic64_t *v)
-{
-	s64 val;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%1]	\n"
-	"	sub.f   %L0, %L0, 1	# w0 - 1, set C on borrow\n"
-	"	sub.c   %H0, %H0, 1	# if C set, w1 - 1\n"
-	"	brlt    %H0, 0, 2f	\n"
-	"	scondd  %0, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(val)
-	: "r"(&v->counter)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return val;
-}
-#define atomic64_dec_if_positive atomic64_dec_if_positive
-
-/**
- * atomic64_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if it was not @u.
- * Returns the old value of @v
- */
-static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
-{
-	s64 old, temp;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%2]	\n"
-	"	brne	%L0, %L4, 2f	# continue to add since v != u \n"
-	"	breq.d	%H0, %H4, 3f	# return since v == u \n"
-	"2:				\n"
-	"	add.f   %L1, %L0, %L3	\n"
-	"	adc     %H1, %H0, %H3	\n"
-	"	scondd  %1, [%2]	\n"
-	"	bnz     1b		\n"
-	"3:				\n"
-	: "=&r"(old), "=&r" (temp)
-	: "r"(&v->counter), "r"(a), "r"(u)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return old;
-}
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
-
-#endif	/* !CONFIG_GENERIC_ATOMIC64 */
+#else
+#include <asm/atomic64-arcv2.h>
+#endif
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
new file mode 100644
index 000000000000..c5a8010fdc97
--- /dev/null
+++ b/arch/arc/include/asm/atomic64-arcv2.h
@@ -0,0 +1,250 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
+ *  - The address HAS to be 64-bit aligned
+ */
+
+#ifndef _ASM_ARC_ATOMIC64_ARCV2_H
+#define _ASM_ARC_ATOMIC64_ARCV2_H
+
+typedef struct {
+	s64 __aligned(8) counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(a) { (a) }
+
+static inline s64 arch_atomic64_read(const atomic64_t *v)
+{
+	s64 val;
+
+	__asm__ __volatile__(
+	"	ldd   %0, [%1]	\n"
+	: "=r"(val)
+	: "r"(&v->counter));
+
+	return val;
+}
+
+static inline void arch_atomic64_set(atomic64_t *v, s64 a)
+{
+	/*
+	 * This could have been a simple assignment in "C" but would need
+	 * explicit volatile. Otherwise gcc optimizers could elide the store
+	 * which borked atomic64 self-test
+	 * In the inline asm version, memory clobber needed for exact same
+	 * reason, to tell gcc about the store.
+	 *
+	 * This however is not needed for sibling atomic64_add() etc since both
+	 * load/store are explicitly done in inline asm. As long as API is used
+	 * for each access, gcc has no way to optimize away any load/store
+	 */
+	__asm__ __volatile__(
+	"	std   %0, [%1]	\n"
+	:
+	: "r"(a), "r"(&v->counter)
+	: "memory");
+}
+
+#define ATOMIC64_OP(op, op1, op2)					\
+static inline void arch_atomic64_##op(s64 a, atomic64_t *v)		\
+{									\
+	s64 val;							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd  %0, [%1]	\n"				\
+	"	" #op1 " %L0, %L0, %L2	\n"				\
+	"	" #op2 " %H0, %H0, %H2	\n"				\
+	"	scondd   %0, [%1]	\n"				\
+	"	bnz     1b		\n"				\
+	: "=&r"(val)							\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc");							\
+}									\
+
+#define ATOMIC64_OP_RETURN(op, op1, op2)		        	\
+static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)	\
+{									\
+	s64 val;							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd   %0, [%1]	\n"				\
+	"	" #op1 " %L0, %L0, %L2	\n"				\
+	"	" #op2 " %H0, %H0, %H2	\n"				\
+	"	scondd   %0, [%1]	\n"				\
+	"	bnz     1b		\n"				\
+	: [val] "=&r"(val)						\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc");	/* memory clobber comes from smp_mb() */	\
+									\
+	return val;							\
+}
+
+#define arch_atomic64_add_return_relaxed	arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed	arch_atomic64_sub_return_relaxed
+
+#define ATOMIC64_FETCH_OP(op, op1, op2)		        		\
+static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)	\
+{									\
+	s64 val, orig;							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd   %0, [%2]	\n"				\
+	"	" #op1 " %L1, %L0, %L3	\n"				\
+	"	" #op2 " %H1, %H0, %H3	\n"				\
+	"	scondd   %1, [%2]	\n"				\
+	"	bnz     1b		\n"				\
+	: "=&r"(orig), "=&r"(val)					\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc");	/* memory clobber comes from smp_mb() */	\
+									\
+	return orig;							\
+}
+
+#define arch_atomic64_fetch_add_relaxed		arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed		arch_atomic64_fetch_sub_relaxed
+
+#define arch_atomic64_fetch_and_relaxed		arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_andnot_relaxed	arch_atomic64_fetch_andnot_relaxed
+#define arch_atomic64_fetch_or_relaxed		arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed		arch_atomic64_fetch_xor_relaxed
+
+#define ATOMIC64_OPS(op, op1, op2)					\
+	ATOMIC64_OP(op, op1, op2)					\
+	ATOMIC64_OP_RETURN(op, op1, op2)				\
+	ATOMIC64_FETCH_OP(op, op1, op2)
+
+ATOMIC64_OPS(add, add.f, adc)
+ATOMIC64_OPS(sub, sub.f, sbc)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, op1, op2)					\
+	ATOMIC64_OP(op, op1, op2)					\
+	ATOMIC64_FETCH_OP(op, op1, op2)
+
+ATOMIC64_OPS(and, and, and)
+ATOMIC64_OPS(andnot, bic, bic)
+ATOMIC64_OPS(or, or, or)
+ATOMIC64_OPS(xor, xor, xor)
+
+#define arch_atomic64_andnot		arch_atomic64_andnot
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+
+static inline s64
+arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
+{
+	s64 prev;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	brne    %L0, %L2, 2f	\n"
+	"	brne    %H0, %H2, 2f	\n"
+	"	scondd  %3, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(prev)
+	: "r"(ptr), "ir"(expected), "r"(new)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return prev;
+}
+
+static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
+{
+	s64 prev;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	scondd  %2, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(prev)
+	: "r"(ptr), "r"(new)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return prev;
+}
+
+/**
+ * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic64_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
+{
+	s64 val;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	sub.f   %L0, %L0, 1	# w0 - 1, set C on borrow\n"
+	"	sub.c   %H0, %H0, 1	# if C set, w1 - 1\n"
+	"	brlt    %H0, 0, 2f	\n"
+	"	scondd  %0, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(val)
+	: "r"(&v->counter)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return val;
+}
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
+
+/**
+ * arch_atomic64_fetch_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, if it was not @u.
+ * Returns the old value of @v
+ */
+static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+	s64 old, temp;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%2]	\n"
+	"	brne	%L0, %L4, 2f	# continue to add since v != u \n"
+	"	breq.d	%H0, %H4, 3f	# return since v == u \n"
+	"2:				\n"
+	"	add.f   %L1, %L0, %L3	\n"
+	"	adc     %H1, %H0, %H3	\n"
+	"	scondd  %1, [%2]	\n"
+	"	bnz     1b		\n"
+	"3:				\n"
+	: "=&r"(old), "=&r" (temp)
+	: "r"(&v->counter), "r"(a), "r"(u)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return old;
+}
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
+
+#endif
diff --git a/arch/arc/include/asm/barrier.h b/arch/arc/include/asm/barrier.h
index 7823811e7cf5..4637de9e02fa 100644
--- a/arch/arc/include/asm/barrier.h
+++ b/arch/arc/include/asm/barrier.h
@@ -27,7 +27,7 @@
 #define rmb()	asm volatile("dmb 1\n" : : : "memory")
 #define wmb()	asm volatile("dmb 2\n" : : : "memory")
 
-#elif !defined(CONFIG_ARC_PLAT_EZNPS)  /* CONFIG_ISA_ARCOMPACT */
+#else
 
 /*
  * ARCompact based cores (ARC700) only have SYNC instruction which is super
@@ -37,13 +37,6 @@
 
 #define mb()	asm volatile("sync\n" : : : "memory")
 
-#else	/* CONFIG_ARC_PLAT_EZNPS */
-
-#include <plat/ctop.h>
-
-#define mb()	asm volatile (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory")
-#define rmb()	asm volatile (".word %0" : : "i"(CTOP_INST_SCHD_RD) : "memory")
-
 #endif
 
 #include <asm-generic/barrier.h>
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 50eb3f64a77c..bdb7e190a294 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -14,242 +14,6 @@
 
 #include <linux/types.h>
 #include <linux/compiler.h>
-#include <asm/barrier.h>
-#ifndef CONFIG_ARC_HAS_LLSC
-#include <asm/smp.h>
-#endif
-
-#ifdef CONFIG_ARC_HAS_LLSC
-
-/*
- * Hardware assisted Atomic-R-M-W
- */
-
-#define BIT_OP(op, c_op, asm_op)					\
-static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned int temp;						\
-									\
-	m += nr >> 5;							\
-									\
-	nr &= 0x1f;							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock       %0, [%1]		\n"			\
-	"	" #asm_op " %0, %0, %2	\n"				\
-	"	scond       %0, [%1]		\n"			\
-	"	bnz         1b			\n"			\
-	: "=&r"(temp)	/* Early clobber, to prevent reg reuse */	\
-	: "r"(m),	/* Not "m": llock only supports reg direct addr mode */	\
-	  "ir"(nr)							\
-	: "cc");							\
-}
-
-/*
- * Semantically:
- *    Test the bit
- *    if clear
- *        set it and return 0 (old value)
- *    else
- *        return 1 (old value).
- *
- * Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally
- * and the old value of bit is returned
- */
-#define TEST_N_BIT_OP(op, c_op, asm_op)					\
-static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long old, temp;					\
-									\
-	m += nr >> 5;							\
-									\
-	nr &= 0x1f;							\
-									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND themselves don't provide any such smenatic	\
-	 */								\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock       %0, [%2]	\n"				\
-	"	" #asm_op " %1, %0, %3	\n"				\
-	"	scond       %1, [%2]	\n"				\
-	"	bnz         1b		\n"				\
-	: "=&r"(old), "=&r"(temp)					\
-	: "r"(m), "ir"(nr)						\
-	: "cc");							\
-									\
-	smp_mb();							\
-									\
-	return (old & (1 << nr)) != 0;					\
-}
-
-#elif !defined(CONFIG_ARC_PLAT_EZNPS)
-
-/*
- * Non hardware assisted Atomic-R-M-W
- * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
- *
- * There's "significant" micro-optimization in writing our own variants of
- * bitops (over generic variants)
- *
- * (1) The generic APIs have "signed" @nr while we have it "unsigned"
- *     This avoids extra code to be generated for pointer arithmatic, since
- *     is "not sure" that index is NOT -ve
- * (2) Utilize the fact that ARCompact bit fidding insn (BSET/BCLR/ASL) etc
- *     only consider bottom 5 bits of @nr, so NO need to mask them off.
- *     (GCC Quirk: however for constant @nr we still need to do the masking
- *             at compile time)
- */
-
-#define BIT_OP(op, c_op, asm_op)					\
-static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long temp, flags;					\
-	m += nr >> 5;							\
-									\
-	/*								\
-	 * spin lock/unlock provide the needed smp_mb() before/after	\
-	 */								\
-	bitops_lock(flags);						\
-									\
-	temp = *m;							\
-	*m = temp c_op (1UL << (nr & 0x1f));					\
-									\
-	bitops_unlock(flags);						\
-}
-
-#define TEST_N_BIT_OP(op, c_op, asm_op)					\
-static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long old, flags;					\
-	m += nr >> 5;							\
-									\
-	bitops_lock(flags);						\
-									\
-	old = *m;							\
-	*m = old c_op (1UL << (nr & 0x1f));				\
-									\
-	bitops_unlock(flags);						\
-									\
-	return (old & (1UL << (nr & 0x1f))) != 0;			\
-}
-
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-#define BIT_OP(op, c_op, asm_op)					\
-static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	m += nr >> 5;							\
-									\
-	nr = (1UL << (nr & 0x1f));					\
-	if (asm_op == CTOP_INST_AAND_DI_R2_R2_R3)			\
-		nr = ~nr;						\
-									\
-	__asm__ __volatile__(						\
-	"	mov r2, %0\n"						\
-	"	mov r3, %1\n"						\
-	"	.word %2\n"						\
-	:								\
-	: "r"(nr), "r"(m), "i"(asm_op)					\
-	: "r2", "r3", "memory");					\
-}
-
-#define TEST_N_BIT_OP(op, c_op, asm_op)					\
-static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long old;						\
-									\
-	m += nr >> 5;							\
-									\
-	nr = old = (1UL << (nr & 0x1f));				\
-	if (asm_op == CTOP_INST_AAND_DI_R2_R2_R3)			\
-		old = ~old;						\
-									\
-	/* Explicit full memory barrier needed before/after */		\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"	mov r2, %0\n"						\
-	"	mov r3, %1\n"						\
-	"       .word %2\n"						\
-	"	mov %0, r2"						\
-	: "+r"(old)							\
-	: "r"(m), "i"(asm_op)						\
-	: "r2", "r3", "memory");					\
-									\
-	smp_mb();							\
-									\
-	return (old & nr) != 0;					\
-}
-
-#endif /* CONFIG_ARC_PLAT_EZNPS */
-
-/***************************************
- * Non atomic variants
- **************************************/
-
-#define __BIT_OP(op, c_op, asm_op)					\
-static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m)	\
-{									\
-	unsigned long temp;						\
-	m += nr >> 5;							\
-									\
-	temp = *m;							\
-	*m = temp c_op (1UL << (nr & 0x1f));				\
-}
-
-#define __TEST_N_BIT_OP(op, c_op, asm_op)				\
-static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long old;						\
-	m += nr >> 5;							\
-									\
-	old = *m;							\
-	*m = old c_op (1UL << (nr & 0x1f));				\
-									\
-	return (old & (1UL << (nr & 0x1f))) != 0;			\
-}
-
-#define BIT_OPS(op, c_op, asm_op)					\
-									\
-	/* set_bit(), clear_bit(), change_bit() */			\
-	BIT_OP(op, c_op, asm_op)					\
-									\
-	/* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\
-	TEST_N_BIT_OP(op, c_op, asm_op)					\
-									\
-	/* __set_bit(), __clear_bit(), __change_bit() */		\
-	__BIT_OP(op, c_op, asm_op)					\
-									\
-	/* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
-	__TEST_N_BIT_OP(op, c_op, asm_op)
-
-#ifndef CONFIG_ARC_PLAT_EZNPS
-BIT_OPS(set, |, bset)
-BIT_OPS(clear, & ~, bclr)
-BIT_OPS(change, ^, bxor)
-#else
-BIT_OPS(set, |, CTOP_INST_AOR_DI_R2_R2_R3)
-BIT_OPS(clear, & ~, CTOP_INST_AAND_DI_R2_R2_R3)
-BIT_OPS(change, ^, CTOP_INST_AXOR_DI_R2_R2_R3)
-#endif
-
-/*
- * This routine doesn't need to be atomic.
- */
-static inline int
-test_bit(unsigned int nr, const volatile unsigned long *addr)
-{
-	unsigned long mask;
-
-	addr += nr >> 5;
-
-	mask = 1UL << (nr & 0x1f);
-
-	return ((mask & *addr) != 0);
-}
 
 #ifdef CONFIG_ISA_ARCOMPACT
 
@@ -297,10 +61,8 @@ static inline int constant_fls(unsigned int x)
 		x <<= 2;
 		r -= 2;
 	}
-	if (!(x & 0x80000000u)) {
-		x <<= 1;
+	if (!(x & 0x80000000u))
 		r -= 1;
-	}
 	return r;
 }
 
@@ -352,7 +114,7 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
  * @result: [1-32]
  * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
  */
-static inline __attribute__ ((const)) int fls(unsigned long x)
+static inline __attribute__ ((const)) int fls(unsigned int x)
 {
 	int n;
 
@@ -379,7 +141,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x)
  * ffs = Find First Set in word (LSB to MSB)
  * @result: [1-32], 0 if all 0's
  */
-static inline __attribute__ ((const)) int ffs(unsigned long x)
+static inline __attribute__ ((const)) int ffs(unsigned int x)
 {
 	int n;
 
@@ -424,8 +186,9 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-atomic.h>
 
-#include <asm-generic/bitops/find.h>
 #include <asm-generic/bitops/le.h>
 #include <asm-generic/bitops/ext2-atomic-setbit.h>
 
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index d8ece4292388..f0f1fc5d62b6 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -62,10 +62,6 @@
 #define ARCH_SLAB_MINALIGN	8
 #endif
 
-extern void arc_cache_init(void);
-extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
-extern void read_decode_cache_bcr(void);
-
 extern int ioc_enable;
 extern unsigned long perip_base, perip_end;
 
diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h
index 69debd77cd04..0b485800a392 100644
--- a/arch/arc/include/asm/checksum.h
+++ b/arch/arc/include/asm/checksum.h
@@ -24,7 +24,7 @@
  */
 static inline __sum16 csum_fold(__wsum s)
 {
-	unsigned r = s << 16 | s >> 16;	/* ror */
+	unsigned int r = s << 16 | s >> 16;	/* ror */
 	s = ~s;
 	s -= r;
 	return s >> 16;
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index c11398160240..c5b544a5fe81 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -6,6 +6,7 @@
 #ifndef __ASM_ARC_CMPXCHG_H
 #define __ASM_ARC_CMPXCHG_H
 
+#include <linux/build_bug.h>
 #include <linux/types.h>
 
 #include <asm/barrier.h>
@@ -13,212 +14,130 @@
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
-{
-	unsigned long prev;
-
-	/*
-	 * Explicit full memory barrier needed before/after as
-	 * LLOCK/SCOND thmeselves don't provide any such semantics
-	 */
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llock   %0, [%1]	\n"
-	"	brne    %0, %2, 2f	\n"
-	"	scond   %3, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(prev)	/* Early clobber, to prevent reg reuse */
-	: "r"(ptr),	/* Not "m": llock only supports reg direct addr mode */
-	  "ir"(expected),
-	  "r"(new)	/* can't be "ir". scond can't take LIMM for "b" */
-	: "cc", "memory"); /* so that gcc knows memory is being written here */
-
-	smp_mb();
-
-	return prev;
-}
-
-#elif !defined(CONFIG_ARC_PLAT_EZNPS)
-
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
-{
-	unsigned long flags;
-	int prev;
-	volatile unsigned long *p = ptr;
-
-	/*
-	 * spin lock/unlock provide the needed smp_mb() before/after
-	 */
-	atomic_ops_lock(flags);
-	prev = *p;
-	if (prev == expected)
-		*p = new;
-	atomic_ops_unlock(flags);
-	return prev;
-}
-
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
-{
-	/*
-	 * Explicit full memory barrier needed before/after
-	 */
-	smp_mb();
-
-	write_aux_reg(CTOP_AUX_GPA1, expected);
-
-	__asm__ __volatile__(
-	"	mov r2, %0\n"
-	"	mov r3, %1\n"
-	"	.word %2\n"
-	"	mov %0, r2"
-	: "+r"(new)
-	: "r"(ptr), "i"(CTOP_INST_EXC_DI_R2_R2_R3)
-	: "r2", "r3", "memory");
-
-	smp_mb();
-
-	return new;
-}
-
-#endif /* CONFIG_ARC_HAS_LLSC */
-
-#define cmpxchg(ptr, o, n) ({				\
-	(typeof(*(ptr)))__cmpxchg((ptr),		\
-				  (unsigned long)(o),	\
-				  (unsigned long)(n));	\
-})
-
-/*
- * atomic_cmpxchg is same as cmpxchg
- *   LLSC: only different in data-type, semantics are exactly same
- *  !LLSC: cmpxchg() has to use an external lock atomic_ops_lock to guarantee
- *         semantics, and this lock also happens to be used by atomic_*()
- */
-#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
-
-
-#ifndef CONFIG_ARC_PLAT_EZNPS
-
 /*
- * xchg (reg with memory) based on "Native atomic" EX insn
+ * if (*ptr == @old)
+ *      *ptr = @new
  */
-static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
-				   int size)
-{
-	extern unsigned long __xchg_bad_pointer(void);
-
-	switch (size) {
-	case 4:
-		smp_mb();
+#define __cmpxchg(ptr, old, new)					\
+({									\
+	__typeof__(*(ptr)) _prev;					\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock  %0, [%1]	\n"					\
+	"	brne   %0, %2, 2f	\n"				\
+	"	scond  %3, [%1]	\n"					\
+	"	bnz     1b		\n"				\
+	"2:				\n"				\
+	: "=&r"(_prev)	/* Early clobber prevent reg reuse */		\
+	: "r"(ptr),	/* Not "m": llock only supports reg */		\
+	  "ir"(old),							\
+	  "r"(new)	/* Not "ir": scond can't take LIMM */		\
+	: "cc",								\
+	  "memory");	/* gcc knows memory is clobbered */		\
+									\
+	_prev;								\
+})
 
-		__asm__ __volatile__(
-		"	ex  %0, [%1]	\n"
-		: "+r"(val)
-		: "r"(ptr)
-		: "memory");
+#define arch_cmpxchg_relaxed(ptr, old, new)				\
+({									\
+	__typeof__(ptr) _p_ = (ptr);					\
+	__typeof__(*(ptr)) _o_ = (old);					\
+	__typeof__(*(ptr)) _n_ = (new);					\
+	__typeof__(*(ptr)) _prev_;					\
+									\
+	switch(sizeof((_p_))) {						\
+	case 4:								\
+		_prev_ = __cmpxchg(_p_, _o_, _n_);			\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	_prev_;								\
+})
 
-		smp_mb();
+#else
 
-		return val;
-	}
-	return __xchg_bad_pointer();
-}
+#define arch_cmpxchg(ptr, old, new)				        \
+({									\
+	volatile __typeof__(ptr) _p_ = (ptr);				\
+	__typeof__(*(ptr)) _o_ = (old);					\
+	__typeof__(*(ptr)) _n_ = (new);					\
+	__typeof__(*(ptr)) _prev_;					\
+	unsigned long __flags;						\
+									\
+	BUILD_BUG_ON(sizeof(_p_) != 4);					\
+									\
+	/*								\
+	 * spin lock/unlock provide the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(__flags);					\
+	_prev_ = *_p_;							\
+	if (_prev_ == _o_)						\
+		*_p_ = _n_;						\
+	atomic_ops_unlock(__flags);					\
+	_prev_;								\
+})
 
-#define _xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \
-						 sizeof(*(ptr))))
+#endif
 
 /*
- * xchg() maps directly to ARC EX instruction which guarantees atomicity.
- * However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock
- * due to a subtle reason:
- *  - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot
- *    of  kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
- *    Hence xchg() needs to follow same locking rules.
- *
- * Technically the lock is also needed for UP (boils down to irq save/restore)
- * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
- * be disabled thus can't possibly be interrpted/preempted/clobbered by xchg()
- * Other way around, xchg is one instruction anyways, so can't be interrupted
- * as such
+ * xchg
  */
+#ifdef CONFIG_ARC_HAS_LLSC
 
-#if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP)
-
-#define xchg(ptr, with)			\
-({					\
-	unsigned long flags;		\
-	typeof(*(ptr)) old_val;		\
-					\
-	atomic_ops_lock(flags);		\
-	old_val = _xchg(ptr, with);	\
-	atomic_ops_unlock(flags);	\
-	old_val;			\
+#define __xchg(ptr, val)						\
+({									\
+	__asm__ __volatile__(						\
+	"	ex  %0, [%1]	\n"	/* set new value */	        \
+	: "+r"(val)							\
+	: "r"(ptr)							\
+	: "memory");							\
+	_val_;		/* get old value */				\
 })
 
-#else
-
-#define xchg(ptr, with)  _xchg(ptr, with)
-
-#endif
-
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
-				   int size)
-{
-	extern unsigned long __xchg_bad_pointer(void);
-
-	switch (size) {
-	case 4:
-		/*
-		 * Explicit full memory barrier needed before/after
-		 */
-		smp_mb();
-
-		__asm__ __volatile__(
-		"	mov r2, %0\n"
-		"	mov r3, %1\n"
-		"	.word %2\n"
-		"	mov %0, r2\n"
-		: "+r"(val)
-		: "r"(ptr), "i"(CTOP_INST_XEX_DI_R2_R2_R3)
-		: "r2", "r3", "memory");
-
-		smp_mb();
-
-		return val;
-	}
-	return __xchg_bad_pointer();
-}
-
-#define xchg(ptr, with) ({				\
-	(typeof(*(ptr)))__xchg((unsigned long)(with),	\
-			       (ptr),			\
-			       sizeof(*(ptr)));		\
+#define arch_xchg_relaxed(ptr, val)					\
+({									\
+	__typeof__(ptr) _p_ = (ptr);					\
+	__typeof__(*(ptr)) _val_ = (val);				\
+									\
+	switch(sizeof(*(_p_))) {					\
+	case 4:								\
+		_val_ = __xchg(_p_, _val_);				\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	_val_;								\
 })
 
-#endif /* CONFIG_ARC_PLAT_EZNPS */
+#else  /* !CONFIG_ARC_HAS_LLSC */
 
 /*
- * "atomic" variant of xchg()
- * REQ: It needs to follow the same serialization rules as other atomic_xxx()
- * Since xchg() doesn't always do that, it would seem that following defintion
- * is incorrect. But here's the rationale:
- *   SMP : Even xchg() takes the atomic_ops_lock, so OK.
- *   LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC
- *         is natively "SMP safe", no serialization required).
- *   UP  : other atomics disable IRQ, so no way a difft ctxt atomic_xchg()
- *         could clobber them. atomic_xchg() itself would be 1 insn, so it
- *         can't be clobbered by others. Thus no serialization required when
- *         atomic_xchg is involved.
+ * EX instructions is baseline and present in !LLSC too. But in this
+ * regime it still needs use @atomic_ops_lock spinlock to allow interop
+ * with cmpxchg() which uses spinlock in !LLSC
+ * (llist.h use xchg and cmpxchg on sama data)
  */
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+#define arch_xchg(ptr, val)					        \
+({									\
+	__typeof__(ptr) _p_ = (ptr);					\
+	__typeof__(*(ptr)) _val_ = (val);				\
+									\
+	unsigned long __flags;						\
+									\
+	atomic_ops_lock(__flags);					\
+									\
+	__asm__ __volatile__(						\
+	"	ex  %0, [%1]	\n"					\
+	: "+r"(_val_)							\
+	: "r"(_p_)							\
+	: "memory");							\
+									\
+	atomic_ops_unlock(__flags);					\
+	_val_;								\
+})
+
+#endif
 
 #endif
diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h
index c77a0e3671ac..0284ace0e1ab 100644
--- a/arch/arc/include/asm/elf.h
+++ b/arch/arc/include/asm/elf.h
@@ -19,7 +19,7 @@
 #define  R_ARC_32_PCREL		0x31
 
 /*to set parameters in the core dumps */
-#define ELF_ARCH		EM_ARCOMPACT
+#define ELF_ARCH		EM_ARC_INUSE
 #define ELF_CLASS		ELFCLASS32
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index c3aa775878dc..5aab4f93ab8a 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -33,10 +33,6 @@
 #include <asm/irqflags-compact.h>
 #include <asm/thread_info.h>	/* For THREAD_SIZE */
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-#include <plat/ctop.h>
-#endif
-
 /*--------------------------------------------------------------
  * Switch to Kernel Mode stack if SP points to User Mode stack
  *
@@ -130,19 +126,11 @@
  * to be saved again on kernel mode stack, as part of pt_regs.
  *-------------------------------------------------------------*/
 .macro PROLOG_FREEUP_REG	reg, mem
-#ifndef ARC_USE_SCRATCH_REG
-	sr  \reg, [ARC_REG_SCRATCH_DATA0]
-#else
 	st  \reg, [\mem]
-#endif
 .endm
 
 .macro PROLOG_RESTORE_REG	reg, mem
-#ifndef ARC_USE_SCRATCH_REG
-	lr  \reg, [ARC_REG_SCRATCH_DATA0]
-#else
 	ld  \reg, [\mem]
-#endif
 .endm
 
 /*--------------------------------------------------------------
@@ -189,12 +177,6 @@
 	PUSHAX	lp_start
 	PUSHAX	erbta
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	.word CTOP_INST_SCHD_RW
-	PUSHAX  CTOP_AUX_GPA1
-	PUSHAX  CTOP_AUX_EFLAGS
-#endif
-
 	lr	r10, [ecr]
 	st      r10, [sp, PT_event]    /* EV_Trap expects r10 to have ECR */
 .endm
@@ -211,11 +193,6 @@
  * by hardware and that is not good.
  *-------------------------------------------------------------*/
 .macro EXCEPTION_EPILOGUE
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	.word CTOP_INST_SCHD_RW
-	POPAX   CTOP_AUX_EFLAGS
-	POPAX   CTOP_AUX_GPA1
-#endif
 
 	POPAX	erbta
 	POPAX	lp_start
@@ -278,11 +255,6 @@
 	PUSHAX	lp_start
 	PUSHAX	bta_l\LVL\()
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	.word CTOP_INST_SCHD_RW
-	PUSHAX  CTOP_AUX_GPA1
-	PUSHAX  CTOP_AUX_EFLAGS
-#endif
 .endm
 
 /*--------------------------------------------------------------
@@ -295,11 +267,6 @@
  * by hardware and that is not good.
  *-------------------------------------------------------------*/
 .macro INTERRUPT_EPILOGUE  LVL
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	.word CTOP_INST_SCHD_RW
-	POPAX   CTOP_AUX_EFLAGS
-	POPAX   CTOP_AUX_GPA1
-#endif
 
 	POPAX	bta_l\LVL\()
 	POPAX	lp_start
@@ -327,13 +294,11 @@
 	bic \reg, sp, (THREAD_SIZE - 1)
 .endm
 
-#ifndef CONFIG_ARC_PLAT_EZNPS
 /* Get CPU-ID of this core */
 .macro  GET_CPU_ID  reg
 	lr  \reg, [identity]
 	lsr \reg, \reg, 8
 	bmsk \reg, \reg, 7
 .endm
-#endif
 
 #endif  /* __ASM_ARC_ENTRY_COMPACT_H */
diff --git a/arch/arc/include/asm/highmem.h b/arch/arc/include/asm/highmem.h
index 6e5eafb3afdd..a6b8e2c352c4 100644
--- a/arch/arc/include/asm/highmem.h
+++ b/arch/arc/include/asm/highmem.h
@@ -9,17 +9,29 @@
 #ifdef CONFIG_HIGHMEM
 
 #include <uapi/asm/page.h>
-#include <asm/kmap_types.h>
+#include <asm/kmap_size.h>
+
+#define FIXMAP_SIZE		PGDIR_SIZE
+#define PKMAP_SIZE		PGDIR_SIZE
 
 /* start after vmalloc area */
 #define FIXMAP_BASE		(PAGE_OFFSET - FIXMAP_SIZE - PKMAP_SIZE)
-#define FIXMAP_SIZE		PGDIR_SIZE	/* only 1 PGD worth */
-#define KM_TYPE_NR		((FIXMAP_SIZE >> PAGE_SHIFT)/NR_CPUS)
-#define FIXMAP_ADDR(nr)		(FIXMAP_BASE + ((nr) << PAGE_SHIFT))
+
+#define FIX_KMAP_SLOTS		(KM_MAX_IDX * NR_CPUS)
+#define FIX_KMAP_BEGIN		(0UL)
+#define FIX_KMAP_END		((FIX_KMAP_BEGIN + FIX_KMAP_SLOTS) - 1)
+
+#define FIXADDR_TOP		(FIXMAP_BASE + (FIX_KMAP_END << PAGE_SHIFT))
+
+/*
+ * This should be converted to the asm-generic version, but of course this
+ * is needlessly different from all other architectures. Sigh - tglx
+ */
+#define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x)	(((FIXADDR_TOP - ((x) & PAGE_MASK))) >> PAGE_SHIFT)
 
 /* start after fixmap area */
 #define PKMAP_BASE		(FIXMAP_BASE + FIXMAP_SIZE)
-#define PKMAP_SIZE		PGDIR_SIZE
 #define LAST_PKMAP		(PKMAP_SIZE >> PAGE_SHIFT)
 #define LAST_PKMAP_MASK		(LAST_PKMAP - 1)
 #define PKMAP_ADDR(nr)		(PKMAP_BASE + ((nr) << PAGE_SHIFT))
@@ -29,11 +41,13 @@
 
 extern void kmap_init(void);
 
+#define arch_kmap_local_post_unmap(vaddr)			\
+	local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE)
+
 static inline void flush_cache_kmaps(void)
 {
 	flush_cache_all();
 }
-
 #endif
 
 #endif
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index 4eef17c5c1da..5001b796fb8d 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -31,7 +31,6 @@ static inline pmd_t pte_pmd(pte_t pte)
 
 #define pmd_write(pmd)		pte_write(pmd_pte(pmd))
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
-#define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
 #define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 
 #define mk_pmd(page, prot)	pte_pmd(mk_pte(page, prot))
@@ -58,14 +57,6 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 				 pmd_t *pmd);
 
-/* Generic variants assume pgtable_t is struct page *, hence need for these */
-#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				       pgtable_t pgtable);
-
-#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
-
 #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
 extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 				unsigned long end);
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
index 7fc73fef5e29..0d63e568d64c 100644
--- a/arch/arc/include/asm/irqflags-compact.h
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -50,8 +50,12 @@
  * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
  *
  * Noted at the time of Abilis Timer List corruption
- *	Orig Bug + Rejected solution	: https://lkml.org/lkml/2013/3/29/67
- *	Reasoning			: https://lkml.org/lkml/2013/4/8/15
+ *
+ * Orig Bug + Rejected solution:
+ * https://lore.kernel.org/lkml/1364553218-31255-1-git-send-email-vgupta@synopsys.com
+ *
+ * Reasoning:
+ * https://lore.kernel.org/lkml/CA+55aFyFWjpSVQM6M266tKrG_ZXJzZ-nYejpmXYQXbrr42mGPQ@mail.gmail.com
  *
  ******************************************************************/
 
@@ -90,6 +94,9 @@ static inline void arch_local_irq_restore(unsigned long flags)
 /*
  * Unconditionally Enable IRQs
  */
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+extern void arch_local_irq_enable(void);
+#else
 static inline void arch_local_irq_enable(void)
 {
 	unsigned long temp;
@@ -102,7 +109,7 @@ static inline void arch_local_irq_enable(void)
 	: "n"((STATUS_E1_MASK | STATUS_E2_MASK))
 	: "cc", "memory");
 }
-
+#endif
 
 /*
  * Unconditionally Disable IRQs
diff --git a/arch/arc/include/asm/kmap_types.h b/arch/arc/include/asm/kmap_types.h
deleted file mode 100644
index fecf7851ec32..000000000000
--- a/arch/arc/include/asm/kmap_types.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef _ASM_KMAP_TYPES_H
-#define _ASM_KMAP_TYPES_H
-
-/*
- * We primarily need to define KM_TYPE_NR here but that in turn
- * is a function of PGDIR_SIZE etc.
- * To avoid circular deps issue, put everything in asm/highmem.h
- */
-#endif
diff --git a/arch/arc/include/asm/kprobes.h b/arch/arc/include/asm/kprobes.h
index 2134721dce44..de1566e32cb8 100644
--- a/arch/arc/include/asm/kprobes.h
+++ b/arch/arc/include/asm/kprobes.h
@@ -46,7 +46,7 @@ struct kprobe_ctlblk {
 };
 
 int kprobe_fault_handler(struct pt_regs *regs, unsigned long cause);
-void kretprobe_trampoline(void);
+void __kretprobe_trampoline(void);
 void trap_is_kprobe(unsigned long address, struct pt_regs *regs);
 #else
 #define trap_is_kprobe(address, regs)
diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h
index fe19f1d412e7..c9434ff3aa4c 100644
--- a/arch/arc/include/asm/linkage.h
+++ b/arch/arc/include/asm/linkage.h
@@ -64,15 +64,15 @@
 #else	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_ARC_HAS_ICCM
-#define __arcfp_code __section(.text.arcfp)
+#define __arcfp_code __section(".text.arcfp")
 #else
-#define __arcfp_code __section(.text)
+#define __arcfp_code __section(".text")
 #endif
 
 #ifdef CONFIG_ARC_HAS_DCCM
-#define __arcfp_data __section(.data.arcfp)
+#define __arcfp_data __section(".data.arcfp")
 #else
-#define __arcfp_data __section(.data)
+#define __arcfp_data __section(".data")
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h
index 73746ed5b834..c4e197059379 100644
--- a/arch/arc/include/asm/mach_desc.h
+++ b/arch/arc/include/asm/mach_desc.h
@@ -53,7 +53,7 @@ extern const struct machine_desc __arch_info_begin[], __arch_info_end[];
  */
 #define MACHINE_START(_type, _name)			\
 static const struct machine_desc __mach_desc_##_type	\
-__used __section(.arch.info.init) = {			\
+__used __section(".arch.info.init") = {			\
 	.name		= _name,
 
 #define MACHINE_END				\
diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h
new file mode 100644
index 000000000000..ed9036d4ede3
--- /dev/null
+++ b/arch/arc/include/asm/mmu-arcv2.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012, 2019-20 Synopsys, Inc. (www.synopsys.com)
+ *
+ * MMUv3 (arc700) / MMUv4 (archs) are software page walked and software managed.
+ * This file contains the TLB access registers and commands
+ */
+
+#ifndef _ASM_ARC_MMU_ARCV2_H
+#define _ASM_ARC_MMU_ARCV2_H
+
+/*
+ * TLB Management regs
+ */
+#define ARC_REG_MMU_BCR		0x06f
+
+#ifdef CONFIG_ARC_MMU_V3
+#define ARC_REG_TLBPD0		0x405
+#define ARC_REG_TLBPD1		0x406
+#define ARC_REG_TLBPD1HI	0	/* Dummy: allows common code */
+#define ARC_REG_TLBINDEX	0x407
+#define ARC_REG_TLBCOMMAND	0x408
+#define ARC_REG_PID		0x409
+#define ARC_REG_SCRATCH_DATA0	0x418
+#else
+#define ARC_REG_TLBPD0		0x460
+#define ARC_REG_TLBPD1		0x461
+#define ARC_REG_TLBPD1HI	0x463
+#define ARC_REG_TLBINDEX	0x464
+#define ARC_REG_TLBCOMMAND	0x465
+#define ARC_REG_PID		0x468
+#define ARC_REG_SCRATCH_DATA0	0x46c
+#endif
+
+/* Bits in MMU PID reg */
+#define __TLB_ENABLE		(1 << 31)
+#define __PROG_ENABLE		(1 << 30)
+#define MMU_ENABLE		(__TLB_ENABLE | __PROG_ENABLE)
+
+/* Bits in TLB Index reg */
+#define TLB_LKUP_ERR		0x80000000
+
+#ifdef CONFIG_ARC_MMU_V3
+#define TLB_DUP_ERR		(TLB_LKUP_ERR | 0x00000001)
+#else
+#define TLB_DUP_ERR		(TLB_LKUP_ERR | 0x40000000)
+#endif
+
+/*
+ * TLB Commands
+ */
+#define TLBWrite    		0x1
+#define TLBRead     		0x2
+#define TLBGetIndex 		0x3
+#define TLBProbe    		0x4
+#define TLBWriteNI		0x5  /* write JTLB without inv uTLBs */
+#define TLBIVUTLB		0x6  /* explicitly inv uTLBs */
+
+#ifdef CONFIG_ARC_MMU_V4
+#define TLBInsertEntry		0x7
+#define TLBDeleteEntry		0x8
+#endif
+
+/* Masks for actual TLB "PD"s */
+#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
+#define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
+
+#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK_PHYS | _PAGE_CACHEABLE)
+
+#ifndef __ASSEMBLY__
+
+struct mm_struct;
+extern int pae40_exist_but_not_enab(void);
+
+static inline int is_pae40_enabled(void)
+{
+	return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
+}
+
+static inline void mmu_setup_asid(struct mm_struct *mm, unsigned long asid)
+{
+	write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE);
+}
+
+static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd)
+{
+	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
+#ifdef CONFIG_ISA_ARCV2
+	write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd);
+#endif
+}
+
+#else
+
+.macro ARC_MMU_REENABLE reg
+	lr \reg, [ARC_REG_PID]
+	or \reg, \reg, MMU_ENABLE
+	sr \reg, [ARC_REG_PID]
+.endm
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 26b731d32a2b..ca427c30f70e 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -7,98 +7,15 @@
 #define _ASM_ARC_MMU_H
 
 #ifndef __ASSEMBLY__
-#include <linux/threads.h>	/* NR_CPUS */
-#endif
-
-#if defined(CONFIG_ARC_MMU_V1)
-#define CONFIG_ARC_MMU_VER 1
-#elif defined(CONFIG_ARC_MMU_V2)
-#define CONFIG_ARC_MMU_VER 2
-#elif defined(CONFIG_ARC_MMU_V3)
-#define CONFIG_ARC_MMU_VER 3
-#elif defined(CONFIG_ARC_MMU_V4)
-#define CONFIG_ARC_MMU_VER 4
-#endif
-
-/* MMU Management regs */
-#define ARC_REG_MMU_BCR		0x06f
-#if (CONFIG_ARC_MMU_VER < 4)
-#define ARC_REG_TLBPD0		0x405
-#define ARC_REG_TLBPD1		0x406
-#define ARC_REG_TLBPD1HI	0	/* Dummy: allows code sharing with ARC700 */
-#define ARC_REG_TLBINDEX	0x407
-#define ARC_REG_TLBCOMMAND	0x408
-#define ARC_REG_PID		0x409
-#define ARC_REG_SCRATCH_DATA0	0x418
-#else
-#define ARC_REG_TLBPD0		0x460
-#define ARC_REG_TLBPD1		0x461
-#define ARC_REG_TLBPD1HI	0x463
-#define ARC_REG_TLBINDEX	0x464
-#define ARC_REG_TLBCOMMAND	0x465
-#define ARC_REG_PID		0x468
-#define ARC_REG_SCRATCH_DATA0	0x46c
-#endif
-
-#if defined(CONFIG_ISA_ARCV2) || !defined(CONFIG_SMP)
-#define	ARC_USE_SCRATCH_REG
-#endif
-
-/* Bits in MMU PID register */
-#define __TLB_ENABLE		(1 << 31)
-#define __PROG_ENABLE		(1 << 30)
-#define MMU_ENABLE		(__TLB_ENABLE | __PROG_ENABLE)
-
-/* Error code if probe fails */
-#define TLB_LKUP_ERR		0x80000000
-
-#if (CONFIG_ARC_MMU_VER < 4)
-#define TLB_DUP_ERR	(TLB_LKUP_ERR | 0x00000001)
-#else
-#define TLB_DUP_ERR	(TLB_LKUP_ERR | 0x40000000)
-#endif
-
-/* TLB Commands */
-#define TLBWrite    0x1
-#define TLBRead     0x2
-#define TLBGetIndex 0x3
-#define TLBProbe    0x4
-
-#if (CONFIG_ARC_MMU_VER >= 2)
-#define TLBWriteNI  0x5		/* write JTLB without inv uTLBs */
-#define TLBIVUTLB   0x6		/* explicitly inv uTLBs */
-#else
-#define TLBWriteNI  TLBWrite	/* Not present in hardware, fallback */
-#endif
-
-#if (CONFIG_ARC_MMU_VER >= 4)
-#define TLBInsertEntry	0x7
-#define TLBDeleteEntry	0x8
-#endif
 
-#ifndef __ASSEMBLY__
+#include <linux/threads.h>	/* NR_CPUS */
 
 typedef struct {
 	unsigned long asid[NR_CPUS];	/* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
-#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-void tlb_paranoid_check(unsigned int mm_asid, unsigned long address);
-#else
-#define tlb_paranoid_check(a, b)
 #endif
 
-void arc_mmu_init(void);
-extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
-void read_decode_mmu_bcr(void);
-
-static inline int is_pae40_enabled(void)
-{
-	return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
-}
-
-extern int pae40_exist_but_not_enab(void);
-
-#endif	/* !__ASSEMBLY__ */
+#include <asm/mmu-arcv2.h>
 
 #endif
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 3a5e6a5b9ed6..dda471f5f05b 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -15,22 +15,23 @@
 #ifndef _ASM_ARC_MMU_CONTEXT_H
 #define _ASM_ARC_MMU_CONTEXT_H
 
-#include <asm/arcregs.h>
-#include <asm/tlb.h>
 #include <linux/sched/mm.h>
 
+#include <asm/tlb.h>
 #include <asm-generic/mm_hooks.h>
 
-/*		ARC700 ASID Management
+/*		ARC ASID Management
+ *
+ * MMU tags TLBs with an 8-bit ASID, avoiding need to flush the TLB on
+ * context-switch.
  *
- * ARC MMU provides 8-bit ASID (0..255) to TAG TLB entries, allowing entries
- * with same vaddr (different tasks) to co-exit. This provides for
- * "Fast Context Switch" i.e. no TLB flush on ctxt-switch
+ * ASID is managed per cpu, so task threads across CPUs can have different
+ * ASID. Global ASID management is needed if hardware supports TLB shootdown
+ * and/or shared TLB across cores, which ARC doesn't.
  *
- * Linux assigns each task a unique ASID. A simple round-robin allocation
- * of H/w ASID is done using software tracker @asid_cpu.
- * When it reaches max 255, the allocation cycle starts afresh by flushing
- * the entire TLB and wrapping ASID back to zero.
+ * Each task is assigned unique ASID, with a simple round-robin allocator
+ * tracked in @asid_cpu. When 8-bit value rolls over,a new cycle is started
+ * over from 0, and TLB is flushed
  *
  * A new allocation cycle, post rollover, could potentially reassign an ASID
  * to a different task. Thus the rule is to refresh the ASID in a new cycle.
@@ -93,7 +94,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 	asid_mm(mm, cpu) = asid_cpu(cpu);
 
 set_hw:
-	write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE);
+	mmu_setup_asid(mm, hw_pid(mm, cpu));
 
 	local_irq_restore(flags);
 }
@@ -102,6 +103,7 @@ set_hw:
  * Initialize the context related info for a new mm_struct
  * instance.
  */
+#define init_new_context init_new_context
 static inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
@@ -113,6 +115,7 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 	return 0;
 }
 
+#define destroy_context destroy_context
 static inline void destroy_context(struct mm_struct *mm)
 {
 	unsigned long flags;
@@ -144,22 +147,19 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	 */
 	cpumask_set_cpu(cpu, mm_cpumask(next));
 
-#ifdef ARC_USE_SCRATCH_REG
-	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
-	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
-#endif
+	mmu_setup_pgd(next, next->pgd);
 
 	get_new_mmu_context(next);
 }
 
 /*
- * Called at the time of execve() to get a new ASID
- * Note the subtlety here: get_new_mmu_context() behaves differently here
- * vs. in switch_mm(). Here it always returns a new ASID, because mm has
- * an unallocated "initial" value, while in latter, it moves to a new ASID,
- * only if it was unallocated
+ * activate_mm defaults (in asm-generic) to switch_mm and is called at the
+ * time of execve() to get a new ASID Note the subtlety here:
+ * get_new_mmu_context() behaves differently here vs. in switch_mm(). Here
+ * it always returns a new ASID, because mm has an unallocated "initial"
+ * value, while in latter, it moves to a new ASID, only if it was
+ * unallocated
  */
-#define activate_mm(prev, next)		switch_mm(prev, next, NULL)
 
 /* it seemed that deactivate_mm( ) is a reasonable place to do book-keeping
  * for retiring-mm. However destroy_context( ) still needs to do that because
@@ -168,8 +168,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
  * there is a good chance that task gets sched-out/in, making it's ASID valid
  * again (this teased me for a whole day).
  */
-#define deactivate_mm(tsk, mm)   do { } while (0)
 
-#define enter_lazy_tlb(mm, tsk)
+#include <asm-generic/mmu_context.h>
 
 #endif /* __ASM_ARC_MMU_CONTEXT_H */
diff --git a/arch/arc/include/asm/mmzone.h b/arch/arc/include/asm/mmzone.h
deleted file mode 100644
index b86b9d1e54dc..000000000000
--- a/arch/arc/include/asm/mmzone.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2016 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef _ASM_ARC_MMZONE_H
-#define _ASM_ARC_MMZONE_H
-
-#ifdef CONFIG_DISCONTIGMEM
-
-extern struct pglist_data node_data[];
-#define NODE_DATA(nid) (&node_data[nid])
-
-static inline int pfn_to_nid(unsigned long pfn)
-{
-	int is_end_low = 1;
-
-	if (IS_ENABLED(CONFIG_ARC_HAS_PAE40))
-		is_end_low = pfn <= virt_to_pfn(0xFFFFFFFFUL);
-
-	/*
-	 * node 0: lowmem:             0x8000_0000   to 0xFFFF_FFFF
-	 * node 1: HIGHMEM w/o  PAE40: 0x0           to 0x7FFF_FFFF
-	 *         HIGHMEM with PAE40: 0x1_0000_0000 to ...
-	 */
-	if (pfn >= ARCH_PFN_OFFSET && is_end_low)
-		return 0;
-
-	return 1;
-}
-
-static inline int pfn_valid(unsigned long pfn)
-{
-	int nid = pfn_to_nid(pfn);
-
-	return (pfn <= node_end_pfn(nid));
-}
-#endif /* CONFIG_DISCONTIGMEM  */
-
-#endif
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index b0dfed0f12be..9a62e1d87967 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -7,9 +7,22 @@
 
 #include <uapi/asm/page.h>
 
+#ifdef CONFIG_ARC_HAS_PAE40
+
+#define MAX_POSSIBLE_PHYSMEM_BITS	40
+#define PAGE_MASK_PHYS			(0xff00000000ull | PAGE_MASK)
+
+#else /* CONFIG_ARC_HAS_PAE40 */
+
+#define MAX_POSSIBLE_PHYSMEM_BITS	32
+#define PAGE_MASK_PHYS			PAGE_MASK
+
+#endif /* CONFIG_ARC_HAS_PAE40 */
+
 #ifndef __ASSEMBLY__
 
 #define clear_page(paddr)		memset((paddr), 0, PAGE_SIZE)
+#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 #define copy_page(to, from)		memcpy((to), (from), PAGE_SIZE)
 
 struct vm_area_struct;
@@ -21,57 +34,55 @@ void copy_user_highpage(struct page *to, struct page *from,
 			unsigned long u_vaddr, struct vm_area_struct *vma);
 void clear_user_page(void *to, unsigned long u_vaddr, struct page *page);
 
-#undef STRICT_MM_TYPECHECKS
-
-#ifdef STRICT_MM_TYPECHECKS
-/*
- * These are used to make use of C type-checking..
- */
-typedef struct {
-#ifdef CONFIG_ARC_HAS_PAE40
-	unsigned long long pte;
-#else
-	unsigned long pte;
-#endif
-} pte_t;
 typedef struct {
 	unsigned long pgd;
 } pgd_t;
+
+#define pgd_val(x)	((x).pgd)
+#define __pgd(x)	((pgd_t) { (x) })
+
+#if CONFIG_PGTABLE_LEVELS > 3
+
 typedef struct {
-	unsigned long pgprot;
-} pgprot_t;
+	unsigned long pud;
+} pud_t;
+
+#define pud_val(x)      	((x).pud)
+#define __pud(x)        	((pud_t) { (x) })
 
-#define pte_val(x)      ((x).pte)
-#define pgd_val(x)      ((x).pgd)
-#define pgprot_val(x)   ((x).pgprot)
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
 
-#define __pte(x)        ((pte_t) { (x) })
-#define __pgd(x)        ((pgd_t) { (x) })
-#define __pgprot(x)     ((pgprot_t) { (x) })
+typedef struct {
+	unsigned long pmd;
+} pmd_t;
 
-#define pte_pgprot(x) __pgprot(pte_val(x))
+#define pmd_val(x)	((x).pmd)
+#define __pmd(x)	((pmd_t) { (x) })
 
-#else /* !STRICT_MM_TYPECHECKS */
+#endif
 
+typedef struct {
 #ifdef CONFIG_ARC_HAS_PAE40
-typedef unsigned long long pte_t;
+	unsigned long long pte;
 #else
-typedef unsigned long pte_t;
+	unsigned long pte;
 #endif
-typedef unsigned long pgd_t;
-typedef unsigned long pgprot_t;
+} pte_t;
 
-#define pte_val(x)	(x)
-#define pgd_val(x)	(x)
-#define pgprot_val(x)	(x)
-#define __pte(x)	(x)
-#define __pgd(x)	(x)
-#define __pgprot(x)	(x)
-#define pte_pgprot(x)	(x)
+#define pte_val(x)	((x).pte)
+#define __pte(x)	((pte_t) { (x) })
 
-#endif
+typedef struct {
+	unsigned long pgprot;
+} pgprot_t;
+
+#define pgprot_val(x)	((x).pgprot)
+#define __pgprot(x)	((pgprot_t) { (x) })
+#define pte_pgprot(x)	__pgprot(pte_val(x))
 
-typedef pte_t * pgtable_t;
+typedef struct page *pgtable_t;
 
 /*
  * Use virt_to_pfn with caution:
@@ -82,11 +93,25 @@ typedef pte_t * pgtable_t;
  */
 #define virt_to_pfn(kaddr)	(__pa(kaddr) >> PAGE_SHIFT)
 
-#define ARCH_PFN_OFFSET		virt_to_pfn(CONFIG_LINUX_RAM_BASE)
+/*
+ * When HIGHMEM is enabled we have holes in the memory map so we need
+ * pfn_valid() that takes into account the actual extents of the physical
+ * memory
+ */
+#ifdef CONFIG_HIGHMEM
+
+extern unsigned long arch_pfn_offset;
+#define ARCH_PFN_OFFSET		arch_pfn_offset
 
-#ifdef CONFIG_FLATMEM
+extern int pfn_valid(unsigned long pfn);
+#define pfn_valid		pfn_valid
+
+#else /* CONFIG_HIGHMEM */
+
+#define ARCH_PFN_OFFSET		virt_to_pfn(CONFIG_LINUX_RAM_BASE)
 #define pfn_valid(pfn)		(((pfn) - ARCH_PFN_OFFSET) < max_mapnr)
-#endif
+
+#endif /* CONFIG_HIGHMEM */
 
 /*
  * __pa, __va, virt_to_page (ALERT: deprecated, don't use them)
@@ -95,8 +120,8 @@ typedef pte_t * pgtable_t;
  * virt here means link-address/program-address as embedded in object code.
  * And for ARC, link-addr = physical address
  */
-#define __pa(vaddr)  ((unsigned long)(vaddr))
-#define __va(paddr)  ((void *)((unsigned long)(paddr)))
+#define __pa(vaddr)  		((unsigned long)(vaddr))
+#define __va(paddr)  		((void *)((unsigned long)(paddr)))
 
 #define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
 #define virt_addr_valid(kaddr)  pfn_valid(virt_to_pfn(kaddr))
diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h
index 30b9ae511ea9..d5719a260864 100644
--- a/arch/arc/include/asm/perf_event.h
+++ b/arch/arc/include/asm/perf_event.h
@@ -63,166 +63,8 @@ struct arc_reg_cc_build {
 
 #define PERF_COUNT_ARC_HW_MAX	(PERF_COUNT_HW_MAX + 8)
 
-/*
- * Some ARC pct quirks:
- *
- * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
- * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
- *	The ARC 700 can either measure stalls per pipeline stage, or all stalls
- *	combined; for now we assign all stalls to STALLED_CYCLES_BACKEND
- *	and all pipeline flushes (e.g. caused by mispredicts, etc.) to
- *	STALLED_CYCLES_FRONTEND.
- *
- *	We could start multiple performance counters and combine everything
- *	afterwards, but that makes it complicated.
- *
- *	Note that I$ cache misses aren't counted by either of the two!
- */
-
-/*
- * ARC PCT has hardware conditions with fixed "names" but variable "indexes"
- * (based on a specific RTL build)
- * Below is the static map between perf generic/arc specific event_id and
- * h/w condition names.
- * At the time of probe, we loop thru each index and find it's name to
- * complete the mapping of perf event_id to h/w index as latter is needed
- * to program the counter really
- */
-static const char * const arc_pmu_ev_hw_map[] = {
-	/* count cycles */
-	[PERF_COUNT_HW_CPU_CYCLES] = "crun",
-	[PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
-	[PERF_COUNT_HW_BUS_CYCLES] = "crun",
-
-	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
-	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
-
-	/* counts condition */
-	[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
-	/* All jump instructions that are taken */
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
-#ifdef CONFIG_ISA_ARCV2
-	[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
-#else
-	[PERF_COUNT_ARC_BPOK]         = "bpok",	  /* NP-NT, PT-T, PNT-NT */
-	[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
+#ifdef CONFIG_PERF_EVENTS
+#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
 #endif
-	[PERF_COUNT_ARC_LDC] = "imemrdc",	/* Instr: mem read cached */
-	[PERF_COUNT_ARC_STC] = "imemwrc",	/* Instr: mem write cached */
-
-	[PERF_COUNT_ARC_DCLM] = "dclm",		/* D-cache Load Miss */
-	[PERF_COUNT_ARC_DCSM] = "dcsm",		/* D-cache Store Miss */
-	[PERF_COUNT_ARC_ICM] = "icm",		/* I-cache Miss */
-	[PERF_COUNT_ARC_EDTLB] = "edtlb",	/* D-TLB Miss */
-	[PERF_COUNT_ARC_EITLB] = "eitlb",	/* I-TLB Miss */
-
-	[PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc",	/* Instr: mem read cached */
-	[PERF_COUNT_HW_CACHE_MISSES] = "dclm",		/* D-cache Load Miss */
-};
-
-#define C(_x)			PERF_COUNT_HW_CACHE_##_x
-#define CACHE_OP_UNSUPPORTED	0xffff
-
-static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
-	[C(L1D)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
-			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCLM,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_STC,
-			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCSM,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(L1I)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= PERF_COUNT_HW_INSTRUCTIONS,
-			[C(RESULT_MISS)]	= PERF_COUNT_ARC_ICM,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(LL)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(DTLB)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
-			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EDTLB,
-		},
-			/* DTLB LD/ST Miss not segregated by h/w*/
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(ITLB)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EITLB,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(BPU)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
-			[C(RESULT_MISS)]	= PERF_COUNT_HW_BRANCH_MISSES,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(NODE)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-};
 
 #endif /* __ASM_PERF_EVENT_H */
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index b747f2ec2928..096b8ef58edb 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -18,10 +18,10 @@
  * vineetg: April 2010
  *  -Switched pgtable_t from being struct page * to unsigned long
  *      =Needed so that Page Table allocator (pte_alloc_one) is not forced to
- *       to deal with struct page. Thay way in future we can make it allocate
+ *       deal with struct page. That way in future we can make it allocate
  *       multiple PG Tbls in one Page Frame
  *      =sweet side effect is avoiding calls to ugly page_address( ) from the
- *       pg-tlb allocator sub-sys (pte_alloc_one, ptr_free, pmd_populate
+ *       pg-tlb allocator sub-sys (pte_alloc_one, ptr_free, pmd_populate)
  *
  *  Amit Bhor, Sameer Dhavale: Codito Technologies 2004
  */
@@ -31,30 +31,32 @@
 
 #include <linux/mm.h>
 #include <linux/log2.h>
+#include <asm-generic/pgalloc.h>
 
 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 {
-	pmd_set(pmd, pte);
+	/*
+	 * The cast to long below is OK in 32-bit PAE40 regime with long long pte
+	 * Despite "wider" pte, the pte table needs to be in non-PAE low memory
+	 * as all higher levels can only hold long pointers.
+	 *
+	 * The cast itself is needed given simplistic definition of set_pmd()
+	 */
+	set_pmd(pmd, __pmd((unsigned long)pte));
 }
 
-static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page)
 {
-	pmd_set(pmd, (pte_t *) ptep);
-}
-
-static inline int __get_order_pgd(void)
-{
-	return get_order(PTRS_PER_PGD * sizeof(pgd_t));
+	set_pmd(pmd, __pmd((unsigned long)page_address(pte_page)));
 }
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	int num, num2;
-	pgd_t *ret = (pgd_t *) __get_free_pages(GFP_KERNEL, __get_order_pgd());
+	pgd_t *ret = (pgd_t *) __get_free_page(GFP_KERNEL);
 
 	if (ret) {
+		int num, num2;
 		num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE;
 		memzero(ret, num * sizeof(pgd_t));
 
@@ -68,67 +70,28 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 	return ret;
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	free_pages((unsigned long)pgd, __get_order_pgd());
-}
-
-
-/*
- * With software-only page-tables, addr-split for traversal is tweakable and
- * that directly governs how big tables would be at each level.
- * Further, the MMU page size is configurable.
- * Thus we need to programatically assert the size constraint
- * All of this is const math, allowing gcc to do constant folding/propagation.
- */
+#if CONFIG_PGTABLE_LEVELS > 3
 
-static inline int __get_order_pte(void)
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
 {
-	return get_order(PTRS_PER_PTE * sizeof(pte_t));
+	set_p4d(p4dp, __p4d((unsigned long)pudp));
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-	pte_t *pte;
+#define __pud_free_tlb(tlb, pmd, addr)  pud_free((tlb)->mm, pmd)
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					 __get_order_pte());
+#endif
 
-	return pte;
-}
+#if CONFIG_PGTABLE_LEVELS > 2
 
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
 {
-	pgtable_t pte_pg;
-	struct page *page;
-
-	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte());
-	if (!pte_pg)
-		return 0;
-	memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
-	page = virt_to_page(pte_pg);
-	if (!pgtable_pte_page_ctor(page)) {
-		__free_page(page);
-		return 0;
-	}
-
-	return pte_pg;
+	set_pud(pudp, __pud((unsigned long)pmdp));
 }
 
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-	free_pages((unsigned long)pte, __get_order_pte()); /* takes phy addr */
-}
+#define __pmd_free_tlb(tlb, pmd, addr)  pmd_free((tlb)->mm, pmd)
 
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptep)
-{
-	pgtable_pte_page_dtor(virt_to_page(ptep));
-	free_pages((unsigned long)ptep, __get_order_pte());
-}
+#endif
 
 #define __pte_free_tlb(tlb, pte, addr)  pte_free((tlb)->mm, pte)
 
-#define pmd_pgtable(pmd)	((pgtable_t) pmd_page_vaddr(pmd))
-
 #endif /* _ASM_ARC_PGALLOC_H */
diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h
new file mode 100644
index 000000000000..183d23bc1e00
--- /dev/null
+++ b/arch/arc/include/asm/pgtable-bits-arcv2.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * page table flags for software walked/managed MMUv3 (ARC700) and MMUv4 (HS)
+ * There correspond to the corresponding bits in the TLB
+ */
+
+#ifndef _ASM_ARC_PGTABLE_BITS_ARCV2_H
+#define _ASM_ARC_PGTABLE_BITS_ARCV2_H
+
+#ifdef CONFIG_ARC_CACHE_PAGES
+#define _PAGE_CACHEABLE		(1 << 0)  /* Cached (H) */
+#else
+#define _PAGE_CACHEABLE		0
+#endif
+
+#define _PAGE_EXECUTE		(1 << 1)  /* User Execute  (H) */
+#define _PAGE_WRITE		(1 << 2)  /* User Write    (H) */
+#define _PAGE_READ		(1 << 3)  /* User Read     (H) */
+#define _PAGE_ACCESSED		(1 << 4)  /* Accessed      (s) */
+#define _PAGE_DIRTY		(1 << 5)  /* Modified      (s) */
+#define _PAGE_SPECIAL		(1 << 6)
+#define _PAGE_GLOBAL		(1 << 8)  /* ASID agnostic (H) */
+#define _PAGE_PRESENT		(1 << 9)  /* PTE/TLB Valid (H) */
+
+#ifdef CONFIG_ARC_MMU_V4
+#define _PAGE_HW_SZ		(1 << 10)  /* Normal/super (H) */
+#else
+#define _PAGE_HW_SZ		0
+#endif
+
+/* Defaults for every user page */
+#define ___DEF		(_PAGE_PRESENT | _PAGE_CACHEABLE)
+
+/* Set of bits not changed in pte_modify */
+#define _PAGE_CHG_MASK	(PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
+							   _PAGE_SPECIAL)
+
+/* More Abbrevaited helpers */
+#define PAGE_U_NONE     __pgprot(___DEF)
+#define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
+#define PAGE_U_W_R      __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
+#define PAGE_U_X_R      __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
+#define PAGE_U_X_W_R    __pgprot(___DEF \
+				| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
+#define PAGE_KERNEL     __pgprot(___DEF | _PAGE_GLOBAL \
+				| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
+
+#define PAGE_SHARED	PAGE_U_W_R
+
+#define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
+
+/*
+ * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
+ *
+ * Certain cases have 1:1 mapping
+ *  e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
+ *       which directly corresponds to  PAGE_U_X_R
+ *
+ * Other rules which cause the divergence from 1:1 mapping
+ *
+ *  1. Although ARC700 can do exclusive execute/write protection (meaning R
+ *     can be tracked independet of X/W unlike some other CPUs), still to
+ *     keep things consistent with other archs:
+ *      -Write implies Read:   W => R
+ *      -Execute implies Read: X => R
+ *
+ *  2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
+ *     This is to enable COW mechanism
+ */
+	/* xwr */
+#define __P000  PAGE_U_NONE
+#define __P001  PAGE_U_R
+#define __P010  PAGE_U_R	/* Pvt-W => !W */
+#define __P011  PAGE_U_R	/* Pvt-W => !W */
+#define __P100  PAGE_U_X_R	/* X => R */
+#define __P101  PAGE_U_X_R
+#define __P110  PAGE_U_X_R	/* Pvt-W => !W and X => R */
+#define __P111  PAGE_U_X_R	/* Pvt-W => !W */
+
+#define __S000  PAGE_U_NONE
+#define __S001  PAGE_U_R
+#define __S010  PAGE_U_W_R	/* W => R */
+#define __S011  PAGE_U_W_R
+#define __S100  PAGE_U_X_R	/* X => R */
+#define __S101  PAGE_U_X_R
+#define __S110  PAGE_U_X_W_R	/* X => R */
+#define __S111  PAGE_U_X_W_R
+
+#ifndef __ASSEMBLY__
+
+#define pte_write(pte)		(pte_val(pte) & _PAGE_WRITE)
+#define pte_dirty(pte)		(pte_val(pte) & _PAGE_DIRTY)
+#define pte_young(pte)		(pte_val(pte) & _PAGE_ACCESSED)
+#define pte_special(pte)	(pte_val(pte) & _PAGE_SPECIAL)
+
+#define PTE_BIT_FUNC(fn, op) \
+	static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
+
+PTE_BIT_FUNC(mknotpresent,     &= ~(_PAGE_PRESENT));
+PTE_BIT_FUNC(wrprotect,	&= ~(_PAGE_WRITE));
+PTE_BIT_FUNC(mkwrite,	|= (_PAGE_WRITE));
+PTE_BIT_FUNC(mkclean,	&= ~(_PAGE_DIRTY));
+PTE_BIT_FUNC(mkdirty,	|= (_PAGE_DIRTY));
+PTE_BIT_FUNC(mkold,	&= ~(_PAGE_ACCESSED));
+PTE_BIT_FUNC(mkyoung,	|= (_PAGE_ACCESSED));
+PTE_BIT_FUNC(mkspecial,	|= (_PAGE_SPECIAL));
+PTE_BIT_FUNC(mkhuge,	|= (_PAGE_HW_SZ));
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pteval)
+{
+	set_pte(ptep, pteval);
+}
+
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+		      pte_t *ptep);
+
+/* Encode swap {type,off} tuple into PTE
+ * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
+ * PAGE_PRESENT is zero in a PTE holding swap "identifier"
+ */
+#define __swp_entry(type, off)		((swp_entry_t) \
+					{ ((type) & 0x1f) | ((off) << 13) })
+
+/* Decode a PTE containing swap "identifier "into constituents */
+#define __swp_type(pte_lookalike)	(((pte_lookalike).val) & 0x1f)
+#define __swp_offset(pte_lookalike)	((pte_lookalike).val >> 13)
+
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+
+#define kern_addr_valid(addr)	(1)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#include <asm/hugepage.h>
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h
new file mode 100644
index 000000000000..64ca25d199be
--- /dev/null
+++ b/arch/arc/include/asm/pgtable-levels.h
@@ -0,0 +1,187 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * Helpers for implemenintg paging levels
+ */
+
+#ifndef _ASM_ARC_PGTABLE_LEVELS_H
+#define _ASM_ARC_PGTABLE_LEVELS_H
+
+#if CONFIG_PGTABLE_LEVELS == 2
+
+/*
+ * 2 level paging setup for software walked MMUv3 (ARC700) and MMUv4 (HS)
+ *
+ * [31]            32 bit virtual address              [0]
+ * -------------------------------------------------------
+ * |               | <---------- PGDIR_SHIFT ----------> |
+ * |               |                | <-- PAGE_SHIFT --> |
+ * -------------------------------------------------------
+ *       |                  |                |
+ *       |                  |                --> off in page frame
+ *       |                  ---> index into Page Table
+ *       ----> index into Page Directory
+ *
+ * Given software walk, the vaddr split is arbitrary set to 11:8:13
+ * However enabling of super page in a 2 level regime pegs PGDIR_SHIFT to
+ * super page size.
+ */
+
+#if defined(CONFIG_ARC_HUGEPAGE_16M)
+#define PGDIR_SHIFT		24
+#elif defined(CONFIG_ARC_HUGEPAGE_2M)
+#define PGDIR_SHIFT		21
+#else
+/*
+ * No Super page case
+ * Default value provides 11:8:13 (8K), 10:10:12 (4K)
+ * Limits imposed by pgtable_t only PAGE_SIZE long
+ * (so 4K page can only have 1K entries: or 10 bits)
+ */
+#ifdef CONFIG_ARC_PAGE_SIZE_4K
+#define PGDIR_SHIFT		22
+#else
+#define PGDIR_SHIFT		21
+#endif
+
+#endif
+
+#else /* CONFIG_PGTABLE_LEVELS != 2 */
+
+/*
+ * A default 3 level paging testing setup in software walked MMU
+ *   MMUv4 (8K page): <4> : <7> : <8> : <13>
+ * A default 4 level paging testing setup in software walked MMU
+ *   MMUv4 (8K page): <4> : <3> : <4> : <8> : <13>
+ */
+#define PGDIR_SHIFT		28
+#if CONFIG_PGTABLE_LEVELS > 3
+#define PUD_SHIFT		25
+#endif
+#if CONFIG_PGTABLE_LEVELS > 2
+#define PMD_SHIFT		21
+#endif
+
+#endif /* CONFIG_PGTABLE_LEVELS */
+
+#define PGDIR_SIZE		BIT(PGDIR_SHIFT)
+#define PGDIR_MASK		(~(PGDIR_SIZE - 1))
+#define PTRS_PER_PGD		BIT(32 - PGDIR_SHIFT)
+
+#if CONFIG_PGTABLE_LEVELS > 3
+#define PUD_SIZE		BIT(PUD_SHIFT)
+#define PUD_MASK		(~(PUD_SIZE - 1))
+#define PTRS_PER_PUD		BIT(PGDIR_SHIFT - PUD_SHIFT)
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+#define PMD_SIZE		BIT(PMD_SHIFT)
+#define PMD_MASK		(~(PMD_SIZE - 1))
+#define PTRS_PER_PMD		BIT(PUD_SHIFT - PMD_SHIFT)
+#endif
+
+#define PTRS_PER_PTE		BIT(PMD_SHIFT - PAGE_SHIFT)
+
+#ifndef __ASSEMBLY__
+
+#if CONFIG_PGTABLE_LEVELS > 3
+#include <asm-generic/pgtable-nop4d.h>
+#elif CONFIG_PGTABLE_LEVELS > 2
+#include <asm-generic/pgtable-nopud.h>
+#else
+#include <asm-generic/pgtable-nopmd.h>
+#endif
+
+/*
+ * 1st level paging: pgd
+ */
+#define pgd_ERROR(e) \
+	pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+#if CONFIG_PGTABLE_LEVELS > 3
+
+/* In 4 level paging, p4d_* macros work on pgd */
+#define p4d_none(x)		(!p4d_val(x))
+#define p4d_bad(x)		((p4d_val(x) & ~PAGE_MASK))
+#define p4d_present(x)		(p4d_val(x))
+#define p4d_clear(xp)		do { p4d_val(*(xp)) = 0; } while (0)
+#define p4d_pgtable(p4d)	((pud_t *)(p4d_val(p4d) & PAGE_MASK))
+#define p4d_page(p4d)		virt_to_page(p4d_pgtable(p4d))
+#define set_p4d(p4dp, p4d)	(*(p4dp) = p4d)
+
+/*
+ * 2nd level paging: pud
+ */
+#define pud_ERROR(e) \
+	pr_crit("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
+
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+
+/*
+ * In 3 level paging, pud_* macros work on pgd
+ * In 4 level paging, pud_* macros work on pud
+ */
+#define pud_none(x)		(!pud_val(x))
+#define pud_bad(x)		((pud_val(x) & ~PAGE_MASK))
+#define pud_present(x)		(pud_val(x))
+#define pud_clear(xp)		do { pud_val(*(xp)) = 0; } while (0)
+#define pud_pgtable(pud)	((pmd_t *)(pud_val(pud) & PAGE_MASK))
+#define pud_page(pud)		virt_to_page(pud_pgtable(pud))
+#define set_pud(pudp, pud)	(*(pudp) = pud)
+
+/*
+ * 3rd level paging: pmd
+ */
+#define pmd_ERROR(e) \
+	pr_crit("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+
+#define pmd_pfn(pmd)		((pmd_val(pmd) & PMD_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot)	__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+#define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
+
+#endif
+
+/*
+ * Due to the strange way generic pgtable level folding works, the pmd_* macros
+ *  - are valid even for 2 levels (which supposedly only has pgd - pte)
+ *  - behave differently for 2 vs. 3
+ * In 2  level paging        (pgd -> pte), pmd_* macros work on pgd
+ * In 3+ level paging (pgd -> pmd -> pte), pmd_* macros work on pmd
+ */
+#define pmd_none(x)		(!pmd_val(x))
+#define pmd_bad(x)		((pmd_val(x) & ~PAGE_MASK))
+#define pmd_present(x)		(pmd_val(x))
+#define pmd_clear(xp)		do { pmd_val(*(xp)) = 0; } while (0)
+#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & PAGE_MASK)
+#define pmd_pfn(pmd)		((pmd_val(pmd) & PAGE_MASK) >> PAGE_SHIFT)
+#define pmd_page(pmd)		virt_to_page(pmd_page_vaddr(pmd))
+#define set_pmd(pmdp, pmd)	(*(pmdp) = pmd)
+#define pmd_pgtable(pmd)	((pgtable_t) pmd_page_vaddr(pmd))
+
+/*
+ * 4th level paging: pte
+ */
+#define pte_ERROR(e) \
+	pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+
+#define pte_none(x)		(!pte_val(x))
+#define pte_present(x)		(pte_val(x) & _PAGE_PRESENT)
+#define pte_clear(mm,addr,ptep)	set_pte_at(mm, addr, ptep, __pte(0))
+#define pte_page(pte)		pfn_to_page(pte_pfn(pte))
+#define set_pte(ptep, pte)	((*(ptep)) = (pte))
+#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
+#define pfn_pte(pfn, prot)	__pte(__pfn_to_phys(pfn) | pgprot_val(prot))
+#define mk_pte(page, prot)	pfn_pte(page_to_pfn(page), prot)
+
+#ifdef CONFIG_ISA_ARCV2
+#define pmd_leaf(x)		(pmd_val(x) & _PAGE_HW_SZ)
+#endif
+
+#endif	/* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index f1ed17edb085..4cf45a99fd79 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -1,224 +1,17 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * vineetg: May 2011
- *  -Folded PAGE_PRESENT (used by VM) and PAGE_VALID (used by MMU) into 1.
- *     They are semantically the same although in different contexts
- *     VALID marks a TLB entry exists and it will only happen if PRESENT
- *  - Utilise some unused free bits to confine PTE flags to 12 bits
- *     This is a must for 4k pg-sz
- *
- * vineetg: Mar 2011 - changes to accommodate MMU TLB Page Descriptor mods
- *  -TLB Locking never really existed, except for initial specs
- *  -SILENT_xxx not needed for our port
- *  -Per my request, MMU V3 changes the layout of some of the bits
- *     to avoid a few shifts in TLB Miss handlers.
- *
- * vineetg: April 2010
- *  -PGD entry no longer contains any flags. If empty it is 0, otherwise has
- *   Pg-Tbl ptr. Thus pmd_present(), pmd_valid(), pmd_set( ) become simpler
- *
- * vineetg: April 2010
- *  -Switched form 8:11:13 split for page table lookup to 11:8:13
- *  -this speeds up page table allocation itself as we now have to memset 1K
- *    instead of 8k per page table.
- * -TODO: Right now page table alloc is 8K and rest 7K is unused
- *    need to optimise it
- *
- * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
  */
 
 #ifndef _ASM_ARC_PGTABLE_H
 #define _ASM_ARC_PGTABLE_H
 
 #include <linux/bits.h>
-#include <asm-generic/pgtable-nopmd.h>
-#include <asm/page.h>
-#include <asm/mmu.h>	/* to propagate CONFIG_ARC_MMU_VER <n> */
-
-/**************************************************************************
- * Page Table Flags
- *
- * ARC700 MMU only deals with softare managed TLB entries.
- * Page Tables are purely for Linux VM's consumption and the bits below are
- * suited to that (uniqueness). Hence some are not implemented in the TLB and
- * some have different value in TLB.
- * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible because they live in
- *      seperate PD0 and PD1, which combined forms a translation entry)
- *      while for PTE perspective, they are 8 and 9 respectively
- * with MMU v3: Most bits (except SHARED) represent the exact hardware pos
- *      (saves some bit shift ops in TLB Miss hdlrs)
- */
-
-#if (CONFIG_ARC_MMU_VER <= 2)
-
-#define _PAGE_ACCESSED      (1<<1)	/* Page is accessed (S) */
-#define _PAGE_CACHEABLE     (1<<2)	/* Page is cached (H) */
-#define _PAGE_EXECUTE       (1<<3)	/* Page has user execute perm (H) */
-#define _PAGE_WRITE         (1<<4)	/* Page has user write perm (H) */
-#define _PAGE_READ          (1<<5)	/* Page has user read perm (H) */
-#define _PAGE_DIRTY         (1<<6)	/* Page modified (dirty) (S) */
-#define _PAGE_SPECIAL       (1<<7)
-#define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
-#define _PAGE_PRESENT       (1<<10)	/* TLB entry is valid (H) */
-
-#else	/* MMU v3 onwards */
-
-#define _PAGE_CACHEABLE     (1<<0)	/* Page is cached (H) */
-#define _PAGE_EXECUTE       (1<<1)	/* Page has user execute perm (H) */
-#define _PAGE_WRITE         (1<<2)	/* Page has user write perm (H) */
-#define _PAGE_READ          (1<<3)	/* Page has user read perm (H) */
-#define _PAGE_ACCESSED      (1<<4)	/* Page is accessed (S) */
-#define _PAGE_DIRTY         (1<<5)	/* Page modified (dirty) (S) */
-#define _PAGE_SPECIAL       (1<<6)
-
-#if (CONFIG_ARC_MMU_VER >= 4)
-#define _PAGE_WTHRU         (1<<7)	/* Page cache mode write-thru (H) */
-#endif
-
-#define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
-#define _PAGE_PRESENT       (1<<9)	/* TLB entry is valid (H) */
-
-#if (CONFIG_ARC_MMU_VER >= 4)
-#define _PAGE_HW_SZ         (1<<10)	/* Page Size indicator (H): 0 normal, 1 super */
-#endif
-
-#define _PAGE_SHARED_CODE   (1<<11)	/* Shared Code page with cmn vaddr
-					   usable for shared TLB entries (H) */
-
-#define _PAGE_UNUSED_BIT    (1<<12)
-#endif
-
-/* vmalloc permissions */
-#define _K_PAGE_PERMS  (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \
-			_PAGE_GLOBAL | _PAGE_PRESENT)
-
-#ifndef CONFIG_ARC_CACHE_PAGES
-#undef _PAGE_CACHEABLE
-#define _PAGE_CACHEABLE 0
-#endif
-
-#ifndef _PAGE_HW_SZ
-#define _PAGE_HW_SZ	0
-#endif
-
-/* Defaults for every user page */
-#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
-
-/* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL)
-
-/* More Abbrevaited helpers */
-#define PAGE_U_NONE     __pgprot(___DEF)
-#define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
-#define PAGE_U_W_R      __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
-#define PAGE_U_X_R      __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
-#define PAGE_U_X_W_R    __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE | \
-						       _PAGE_EXECUTE)
-
-#define PAGE_SHARED	PAGE_U_W_R
-
-/* While kernel runs out of unstranslated space, vmalloc/modules use a chunk of
- * user vaddr space - visible in all addr spaces, but kernel mode only
- * Thus Global, all-kernel-access, no-user-access, cached
- */
-#define PAGE_KERNEL          __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE)
-
-/* ioremap */
-#define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS)
-
-/* Masks for actual TLB "PD"s */
-#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
-#define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
-
-#ifdef CONFIG_ARC_HAS_PAE40
-#define PTE_BITS_NON_RWX_IN_PD1	(0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
-#else
-#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE)
-#endif
-
-/**************************************************************************
- * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
- *
- * Certain cases have 1:1 mapping
- *  e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
- *       which directly corresponds to  PAGE_U_X_R
- *
- * Other rules which cause the divergence from 1:1 mapping
- *
- *  1. Although ARC700 can do exclusive execute/write protection (meaning R
- *     can be tracked independet of X/W unlike some other CPUs), still to
- *     keep things consistent with other archs:
- *      -Write implies Read:   W => R
- *      -Execute implies Read: X => R
- *
- *  2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
- *     This is to enable COW mechanism
- */
-	/* xwr */
-#define __P000  PAGE_U_NONE
-#define __P001  PAGE_U_R
-#define __P010  PAGE_U_R	/* Pvt-W => !W */
-#define __P011  PAGE_U_R	/* Pvt-W => !W */
-#define __P100  PAGE_U_X_R	/* X => R */
-#define __P101  PAGE_U_X_R
-#define __P110  PAGE_U_X_R	/* Pvt-W => !W and X => R */
-#define __P111  PAGE_U_X_R	/* Pvt-W => !W */
-
-#define __S000  PAGE_U_NONE
-#define __S001  PAGE_U_R
-#define __S010  PAGE_U_W_R	/* W => R */
-#define __S011  PAGE_U_W_R
-#define __S100  PAGE_U_X_R	/* X => R */
-#define __S101  PAGE_U_X_R
-#define __S110  PAGE_U_X_W_R	/* X => R */
-#define __S111  PAGE_U_X_W_R
-
-/****************************************************************
- * 2 tier (PGD:PTE) software page walker
- *
- * [31]		    32 bit virtual address              [0]
- * -------------------------------------------------------
- * |               | <------------ PGDIR_SHIFT ----------> |
- * |		   |					 |
- * | BITS_FOR_PGD  |  BITS_FOR_PTE  | <-- PAGE_SHIFT --> |
- * -------------------------------------------------------
- *       |                  |                |
- *       |                  |                --> off in page frame
- *       |                  ---> index into Page Table
- *       ----> index into Page Directory
- *
- * In a single page size configuration, only PAGE_SHIFT is fixed
- * So both PGD and PTE sizing can be tweaked
- *  e.g. 8K page (PAGE_SHIFT 13) can have
- *  - PGDIR_SHIFT 21  -> 11:8:13 address split
- *  - PGDIR_SHIFT 24  -> 8:11:13 address split
- *
- * If Super Page is configured, PGDIR_SHIFT becomes fixed too,
- * so the sizing flexibility is gone.
- */
-
-#if defined(CONFIG_ARC_HUGEPAGE_16M)
-#define PGDIR_SHIFT	24
-#elif defined(CONFIG_ARC_HUGEPAGE_2M)
-#define PGDIR_SHIFT	21
-#else
-/*
- * Only Normal page support so "hackable" (see comment above)
- * Default value provides 11:8:13 (8K), 11:9:12 (4K)
- */
-#define PGDIR_SHIFT	21
-#endif
-
-#define BITS_FOR_PTE	(PGDIR_SHIFT - PAGE_SHIFT)
-#define BITS_FOR_PGD	(32 - PGDIR_SHIFT)
-
-#define PGDIR_SIZE	BIT(PGDIR_SHIFT)	/* vaddr span, not PDG sz */
-#define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
-#define	PTRS_PER_PTE	BIT(BITS_FOR_PTE)
-#define	PTRS_PER_PGD	BIT(BITS_FOR_PGD)
+#include <asm/pgtable-levels.h>
+#include <asm/pgtable-bits-arcv2.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
 
 /*
  * Number of entries a user land program use.
@@ -226,147 +19,12 @@
  */
 #define	USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
 
-/*
- * No special requirements for lowest virtual address we permit any user space
- * mapping to be mapped at.
- */
-#define FIRST_USER_ADDRESS      0UL
-
-
-/****************************************************************
- * Bucket load of VM Helpers
- */
-
 #ifndef __ASSEMBLY__
 
-#define pte_ERROR(e) \
-	pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
-#define pgd_ERROR(e) \
-	pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
-
-/* the zero page used for uninitialized and anonymous pages */
 extern char empty_zero_page[PAGE_SIZE];
 #define ZERO_PAGE(vaddr)	(virt_to_page(empty_zero_page))
 
-#define set_pte(pteptr, pteval)	((*(pteptr)) = (pteval))
-#define set_pmd(pmdptr, pmdval)	(*(pmdptr) = pmdval)
-
-/* find the page descriptor of the Page Tbl ref by PMD entry */
-#define pmd_page(pmd)		virt_to_page(pmd_val(pmd) & PAGE_MASK)
-
-/* find the logical addr (phy for ARC) of the Page Tbl ref by PMD entry */
-#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & PAGE_MASK)
-
-/* In a 2 level sys, setup the PGD entry with PTE value */
-static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
-{
-	pmd_val(*pmdp) = (unsigned long)ptep;
-}
-
-#define pte_none(x)			(!pte_val(x))
-#define pte_present(x)			(pte_val(x) & _PAGE_PRESENT)
-#define pte_clear(mm, addr, ptep)	set_pte_at(mm, addr, ptep, __pte(0))
-
-#define pmd_none(x)			(!pmd_val(x))
-#define	pmd_bad(x)			((pmd_val(x) & ~PAGE_MASK))
-#define pmd_present(x)			(pmd_val(x))
-#define pmd_leaf(x)			(pmd_val(x) & _PAGE_HW_SZ)
-#define pmd_clear(xp)			do { pmd_val(*(xp)) = 0; } while (0)
-
-#define pte_page(pte)		pfn_to_page(pte_pfn(pte))
-#define mk_pte(page, prot)	pfn_pte(page_to_pfn(page), prot)
-#define pfn_pte(pfn, prot)	__pte(__pfn_to_phys(pfn) | pgprot_val(prot))
-
-/* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/
-#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
-
-/* Zoo of pte_xxx function */
-#define pte_read(pte)		(pte_val(pte) & _PAGE_READ)
-#define pte_write(pte)		(pte_val(pte) & _PAGE_WRITE)
-#define pte_dirty(pte)		(pte_val(pte) & _PAGE_DIRTY)
-#define pte_young(pte)		(pte_val(pte) & _PAGE_ACCESSED)
-#define pte_special(pte)	(pte_val(pte) & _PAGE_SPECIAL)
-
-#define PTE_BIT_FUNC(fn, op) \
-	static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
-
-PTE_BIT_FUNC(mknotpresent,	&= ~(_PAGE_PRESENT));
-PTE_BIT_FUNC(wrprotect,	&= ~(_PAGE_WRITE));
-PTE_BIT_FUNC(mkwrite,	|= (_PAGE_WRITE));
-PTE_BIT_FUNC(mkclean,	&= ~(_PAGE_DIRTY));
-PTE_BIT_FUNC(mkdirty,	|= (_PAGE_DIRTY));
-PTE_BIT_FUNC(mkold,	&= ~(_PAGE_ACCESSED));
-PTE_BIT_FUNC(mkyoung,	|= (_PAGE_ACCESSED));
-PTE_BIT_FUNC(exprotect,	&= ~(_PAGE_EXECUTE));
-PTE_BIT_FUNC(mkexec,	|= (_PAGE_EXECUTE));
-PTE_BIT_FUNC(mkspecial,	|= (_PAGE_SPECIAL));
-PTE_BIT_FUNC(mkhuge,	|= (_PAGE_HW_SZ));
-
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
-	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
-}
-
-/* Macro to mark a page protection as uncacheable */
-#define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
-
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t pteval)
-{
-	set_pte(ptep, pteval);
-}
-
-/*
- * Macro to quickly access the PGD entry, utlising the fact that some
- * arch may cache the pointer to Page Directory of "current" task
- * in a MMU register
- *
- * Thus task->mm->pgd (3 pointer dereferences, cache misses etc simply
- * becomes read a register
- *
- * ********CAUTION*******:
- * Kernel code might be dealing with some mm_struct of NON "current"
- * Thus use this macro only when you are certain that "current" is current
- * e.g. when dealing with signal frame setup code etc
- */
-#ifdef ARC_USE_SCRATCH_REG
-#define pgd_offset_fast(mm, addr)	\
-({					\
-	pgd_t *pgd_base = (pgd_t *) read_aux_reg(ARC_REG_SCRATCH_DATA0);  \
-	pgd_base + pgd_index(addr);	\
-})
-#else
-#define pgd_offset_fast(mm, addr)	pgd_offset(mm, addr)
-#endif
-
 extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
-		      pte_t *ptep);
-
-/* Encode swap {type,off} tuple into PTE
- * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
- * PAGE_PRESENT is zero in a PTE holding swap "identifier"
- */
-#define __swp_entry(type, off)	((swp_entry_t) { \
-					((type) & 0x1f) | ((off) << 13) })
-
-/* Decode a PTE containing swap "identifier "into constituents */
-#define __swp_type(pte_lookalike)	(((pte_lookalike).val) & 0x1f)
-#define __swp_offset(pte_lookalike)	((pte_lookalike).val >> 13)
-
-/* NOPs, to keep generic kernel happy */
-#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
-
-#define kern_addr_valid(addr)	(1)
-
-/*
- * remap a physical page `pfn' of size `size' with page protection `prot'
- * into virtual address `from'
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#include <asm/hugepage.h>
-#endif
 
 /* to cope with aliasing VIPT cache */
 #define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 0fcea5bad343..54db9d7bb562 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -17,13 +17,6 @@
 #include <asm/dsp.h>
 #include <asm/fpu.h>
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-struct eznps_dp {
-	unsigned int eflags;
-	unsigned int gpa1;
-};
-#endif
-
 /* Arch specific stuff which needs to be saved per task.
  * However these items are not so important so as to earn a place in
  * struct thread_info
@@ -38,9 +31,6 @@ struct thread_struct {
 #ifdef CONFIG_ARC_FPU_SAVE_RESTORE
 	struct arc_fpu fpu;
 #endif
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	struct eznps_dp dp;
-#endif
 };
 
 #define INIT_THREAD  {                          \
@@ -60,17 +50,8 @@ struct task_struct;
  * A lot of busy-wait loops in SMP are based off of non-volatile data otherwise
  * get optimised away by gcc
  */
-#ifndef CONFIG_EZNPS_MTM_EXT
-
 #define cpu_relax()		barrier()
 
-#else
-
-#define cpu_relax()     \
-	__asm__ __volatile__ (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory")
-
-#endif
-
 #define KSTK_EIP(tsk)   (task_pt_regs(tsk)->ret)
 #define KSTK_ESP(tsk)   (task_pt_regs(tsk)->sp)
 
@@ -89,7 +70,7 @@ struct task_struct;
 extern void start_thread(struct pt_regs * regs, unsigned long pc,
 			 unsigned long usp);
 
-extern unsigned int get_wchan(struct task_struct *p);
+extern unsigned int __get_wchan(struct task_struct *p);
 
 #endif /* !__ASSEMBLY__ */
 
@@ -112,31 +93,13 @@ extern unsigned int get_wchan(struct task_struct *p);
 #define VMALLOC_START	(PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20))
 
 /* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */
-#define VMALLOC_SIZE	((CONFIG_ARC_KVADDR_SIZE << 20) - PGDIR_SIZE * 4)
+#define VMALLOC_SIZE	((CONFIG_ARC_KVADDR_SIZE << 20) - PMD_SIZE * 4)
 
 #define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
 
 #define USER_KERNEL_GUTTER    (VMALLOC_START - TASK_SIZE)
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-/* NPS architecture defines special window of 129M in user address space for
- * special memory areas, when accessing this window the MMU do not use TLB.
- * Instead MMU direct the access to:
- * 0x57f00000:0x57ffffff -- 1M of closely coupled memory (aka CMEM)
- * 0x58000000:0x5fffffff -- 16 huge pages, 8M each, with fixed map (aka FMTs)
- *
- * CMEM - is the fastest memory we got and its size is 16K.
- * FMT  - is used to map either to internal/external memory.
- * Internal memory is the second fast memory and its size is 16M
- * External memory is the biggest memory (16G) and also the slowest.
- *
- * STACK_TOP need to be PMD align (21bit) that is why we supply 0x57e00000.
- */
-#define STACK_TOP       0x57e00000
-#else
 #define STACK_TOP       TASK_SIZE
-#endif
-
 #define STACK_TOP_MAX   STACK_TOP
 
 /* This decides where the kernel will search for a free chunk of vm
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 2fdb87addadc..5869a74c0db2 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -8,6 +8,7 @@
 #define __ASM_ARC_PTRACE_H
 
 #include <uapi/asm/ptrace.h>
+#include <linux/compiler.h>
 
 #ifndef __ASSEMBLY__
 
@@ -16,11 +17,6 @@
 #ifdef CONFIG_ISA_ARCOMPACT
 struct pt_regs {
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	unsigned long eflags;	/* Extended FLAGS */
-	unsigned long gpa1;	/* General Purpose Aux */
-#endif
-
 	/* Real registers */
 	unsigned long bta;	/* bta_l1, bta_l2, erbta */
 
@@ -59,6 +55,9 @@ struct pt_regs {
 
 	unsigned long user_r25;
 };
+
+#define MAX_REG_OFFSET offsetof(struct pt_regs, user_r25)
+
 #else
 
 struct pt_regs {
@@ -107,6 +106,8 @@ struct pt_regs {
 	unsigned long status32;
 };
 
+#define MAX_REG_OFFSET offsetof(struct pt_regs, status32)
+
 #endif
 
 /* Callee saved registers - need to be saved only when you are scheduled out */
@@ -154,6 +155,32 @@ static inline long regs_return_value(struct pt_regs *regs)
 	return (long)regs->r0;
 }
 
+static inline void instruction_pointer_set(struct pt_regs *regs,
+					   unsigned long val)
+{
+	instruction_pointer(regs) = val;
+}
+
+static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+	return regs->sp;
+}
+
+extern int regs_query_register_offset(const char *name);
+extern const char *regs_query_register_name(unsigned int offset);
+extern bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr);
+extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
+					       unsigned int n);
+
+static inline unsigned long regs_get_register(struct pt_regs *regs,
+					      unsigned int offset)
+{
+	if (unlikely(offset > MAX_REG_OFFSET))
+		return 0;
+
+	return *(unsigned long *)((unsigned long)regs + offset);
+}
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PTRACE_H */
diff --git a/arch/arc/include/asm/segment.h b/arch/arc/include/asm/segment.h
deleted file mode 100644
index 6a2a5be5026d..000000000000
--- a/arch/arc/include/asm/segment.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef __ASMARC_SEGMENT_H
-#define __ASMARC_SEGMENT_H
-
-#ifndef __ASSEMBLY__
-
-typedef unsigned long mm_segment_t;
-
-#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
-
-#define KERNEL_DS		MAKE_MM_SEG(0)
-#define USER_DS			MAKE_MM_SEG(TASK_SIZE)
-
-#define segment_eq(a, b)	((a) == (b))
-
-#endif /* __ASSEMBLY__ */
-#endif /* __ASMARC_SEGMENT_H */
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index 61a97fe70b86..028a8cf76206 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -2,18 +2,14 @@
 /*
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  */
-#ifndef __ASMARC_SETUP_H
-#define __ASMARC_SETUP_H
+#ifndef __ASM_ARC_SETUP_H
+#define __ASM_ARC_SETUP_H
 
 
 #include <linux/types.h>
 #include <uapi/asm/setup.h>
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-#define COMMAND_LINE_SIZE 2048
-#else
 #define COMMAND_LINE_SIZE 256
-#endif
 
 /*
  * Data structure to map a ID to string
@@ -38,4 +34,12 @@ long __init arc_get_mem_sz(void);
 #define IS_AVAIL2(v, s, cfg)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
 #define IS_AVAIL3(v, v2, s)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
 
+extern void arc_mmu_init(void);
+extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
+extern void read_decode_mmu_bcr(void);
+
+extern void arc_cache_init(void);
+extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
+extern void read_decode_cache_bcr(void);
+
 #endif /* __ASMARC_SETUP_H */
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index c5de4008d19f..d856491606ac 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -105,7 +105,6 @@ static inline const char *arc_platform_smp_cpuinfo(void)
 #include <asm/spinlock.h>
 
 extern arch_spinlock_t smp_atomic_ops_lock;
-extern arch_spinlock_t smp_bitops_lock;
 
 #define atomic_ops_lock(flags)	do {		\
 	local_irq_save(flags);			\
@@ -117,24 +116,11 @@ extern arch_spinlock_t smp_bitops_lock;
 	local_irq_restore(flags);		\
 } while (0)
 
-#define bitops_lock(flags)	do {		\
-	local_irq_save(flags);			\
-	arch_spin_lock(&smp_bitops_lock);	\
-} while (0)
-
-#define bitops_unlock(flags) do {		\
-	arch_spin_unlock(&smp_bitops_lock);	\
-	local_irq_restore(flags);		\
-} while (0)
-
 #else /* !CONFIG_SMP */
 
 #define atomic_ops_lock(flags)		local_irq_save(flags)
 #define atomic_ops_unlock(flags)	local_irq_restore(flags)
 
-#define bitops_lock(flags)		local_irq_save(flags)
-#define bitops_unlock(flags)		local_irq_restore(flags)
-
 #endif /* !CONFIG_SMP */
 
 #endif	/* !CONFIG_ARC_HAS_LLSC */
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index 94bbed88e3fc..192871608925 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -232,15 +232,9 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 
 	__asm__ __volatile__(
 	"1:	ex  %0, [%1]		\n"
-#ifdef CONFIG_EZNPS_MTM_EXT
-	"	.word %3		\n"
-#endif
 	"	breq  %0, %2, 1b	\n"
 	: "+&r" (val)
 	: "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
-#ifdef CONFIG_EZNPS_MTM_EXT
-	, "i"(CTOP_INST_SCHD_RW)
-#endif
 	: "memory");
 
 	smp_mb();
diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h
index 4a3d67989d19..1f85de8288b1 100644
--- a/arch/arc/include/asm/switch_to.h
+++ b/arch/arc/include/asm/switch_to.h
@@ -12,19 +12,10 @@
 #include <asm/dsp-impl.h>
 #include <asm/fpu.h>
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-extern void dp_save_restore(struct task_struct *p, struct task_struct *n);
-#define ARC_EZNPS_DP_PREV(p, n)      dp_save_restore(p, n)
-#else
-#define ARC_EZNPS_DP_PREV(p, n)
-
-#endif /* !CONFIG_ARC_PLAT_EZNPS */
-
 struct task_struct *__switch_to(struct task_struct *p, struct task_struct *n);
 
 #define switch_to(prev, next, last)	\
 do {					\
-	ARC_EZNPS_DP_PREV(prev, next);	\
 	dsp_save_restore(prev, next);	\
 	fpu_save_restore(prev, next);	\
 	last = __switch_to(prev, next);\
diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h
index 94529e89dff0..9709256e31c8 100644
--- a/arch/arc/include/asm/syscall.h
+++ b/arch/arc/include/asm/syscall.h
@@ -12,6 +12,8 @@
 #include <asm/unistd.h>
 #include <asm/ptrace.h>		/* in_syscall() */
 
+extern void *sys_call_table[];
+
 static inline long
 syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
 {
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index f9eef0e8f0b7..6ba7fe417095 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -27,7 +27,6 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/thread_info.h>
-#include <asm/segment.h>
 
 /*
  * low level task data that entry.S needs immediate access to
@@ -40,7 +39,6 @@ struct thread_info {
 	unsigned long flags;		/* low level flags */
 	int preempt_count;		/* 0 => preemptable, <0 => BUG */
 	struct task_struct *task;	/* main task structure */
-	mm_segment_t addr_limit;	/* thread address space */
 	__u32 cpu;			/* current CPU */
 	unsigned long thr_ptr;		/* TLS ptr */
 };
@@ -56,7 +54,6 @@ struct thread_info {
 	.flags      = 0,			\
 	.cpu        = 0,			\
 	.preempt_count  = INIT_PREEMPT_COUNT,	\
-	.addr_limit = KERNEL_DS,		\
 }
 
 static inline __attribute_const__ struct thread_info *current_thread_info(void)
@@ -79,26 +76,31 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SYSCALL_AUDIT	4	/* syscall auditing active */
+#define TIF_NOTIFY_SIGNAL	5	/* signal notifications exist */
 #define TIF_SYSCALL_TRACE	15	/* syscall trace active */
-
 /* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		16
+#define TIF_SYSCALL_TRACEPOINT	17	/* syscall tracepoint instrumentation */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+#define _TIF_NOTIFY_SIGNAL	(1<<TIF_NOTIFY_SIGNAL)
 #define _TIF_MEMDIE		(1<<TIF_MEMDIE)
+#define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
-				 _TIF_NOTIFY_RESUME)
+				 _TIF_NOTIFY_RESUME | _TIF_NOTIFY_SIGNAL)
+
+#define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT)
 
 /*
  * _TIF_ALLWORK_MASK includes SYSCALL_TRACE, but we don't need it.
- * SYSCALL_TRACE is anyway seperately/unconditionally tested right after a
- * syscall, so all that reamins to be tested is _TIF_WORK_MASK
+ * SYSCALL_TRACE is anyway separately/unconditionally tested right after a
+ * syscall, so all that remains to be tested is _TIF_WORK_MASK
  */
 
 #endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/arc/include/asm/tlb-mmu1.h b/arch/arc/include/asm/tlb-mmu1.h
deleted file mode 100644
index a3083b36f5f4..000000000000
--- a/arch/arc/include/asm/tlb-mmu1.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef __ASM_TLB_MMU_V1_H__
-#define __ASM_TLB_MMU_V1_H__
-
-#include <asm/mmu.h>
-
-#if defined(__ASSEMBLY__) && (CONFIG_ARC_MMU_VER == 1)
-
-.macro TLB_WRITE_HEURISTICS
-
-#define JH_HACK1
-#undef JH_HACK2
-#undef JH_HACK3
-
-#ifdef JH_HACK3
-; Calculate set index for 2-way MMU
-; -avoiding use of GetIndex from MMU
-;   and its unpleasant LFSR pseudo-random sequence
-;
-; r1 = TLBPD0 from TLB_RELOAD above
-;
-; -- jh_ex_way_set not cleared on startup
-;    didn't want to change setup.c
-;    hence extra instruction to clean
-;
-; -- should be in cache since in same line
-;    as r0/r1 saves above
-;
-ld  r0,[jh_ex_way_sel]  ; victim pointer
-and r0,r0,1         ; clean
-xor.f   r0,r0,1         ; flip
-st  r0,[jh_ex_way_sel]  ; store back
-asr r0,r1,12        ; get set # <<1, note bit 12=R=0
-or.nz   r0,r0,1         ; set way bit
-and r0,r0,0xff      ; clean
-sr  r0,[ARC_REG_TLBINDEX]
-#endif
-
-#ifdef JH_HACK2
-; JH hack #2
-;  Faster than hack #1 in non-thrash case, but hard-coded for 2-way MMU
-;  Slower in thrash case (where it matters) because more code is executed
-;  Inefficient due to two-register paradigm of this miss handler
-;
-/* r1 = data TLBPD0 at this point */
-lr      r0,[eret]               /* instruction address */
-xor     r0,r0,r1                /* compare set #       */
-and.f   r0,r0,0x000fe000        /* 2-way MMU mask      */
-bne     88f                     /* not in same set - no need to probe */
-
-lr      r0,[eret]               /* instruction address */
-and     r0,r0,PAGE_MASK         /* VPN of instruction address */
-; lr  r1,[ARC_REG_TLBPD0]     /* Data VPN+ASID - already in r1 from TLB_RELOAD*/
-and     r1,r1,0xff              /* Data ASID */
-or      r0,r0,r1                /* Instruction address + Data ASID */
-
-lr      r1,[ARC_REG_TLBPD0]     /* save TLBPD0 containing data TLB*/
-sr      r0,[ARC_REG_TLBPD0]     /* write instruction address to TLBPD0 */
-sr      TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
-lr      r0,[ARC_REG_TLBINDEX]   /* r0 = index where instruction is, if at all */
-sr      r1,[ARC_REG_TLBPD0]     /* restore TLBPD0 */
-
-xor     r0,r0,1                 /* flip bottom bit of data index */
-b.d     89f
-sr      r0,[ARC_REG_TLBINDEX]   /* and put it back */
-88:
-sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
-89:
-#endif
-
-#ifdef JH_HACK1
-;
-; Always checks whether instruction will be kicked out by dtlb miss
-;
-mov_s   r3, r1                  ; save PD0 prepared by TLB_RELOAD in r3
-lr      r0,[eret]               /* instruction address */
-and     r0,r0,PAGE_MASK         /* VPN of instruction address */
-bmsk    r1,r3,7                 /* Data ASID, bits 7-0 */
-or_s    r0,r0,r1                /* Instruction address + Data ASID */
-
-sr      r0,[ARC_REG_TLBPD0]     /* write instruction address to TLBPD0 */
-sr      TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
-lr      r0,[ARC_REG_TLBINDEX]   /* r0 = index where instruction is, if at all */
-sr      r3,[ARC_REG_TLBPD0]     /* restore TLBPD0 */
-
-sr      TLBGetIndex, [ARC_REG_TLBCOMMAND]
-lr      r1,[ARC_REG_TLBINDEX]   /* r1 = index where MMU wants to put data */
-cmp     r0,r1                   /* if no match on indices, go around */
-xor.eq  r1,r1,1                 /* flip bottom bit of data index */
-sr      r1,[ARC_REG_TLBINDEX]   /* and put it back */
-#endif
-
-.endm
-
-#endif
-
-#endif
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index ea40ec7f6cae..99712471c96a 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -23,35 +23,6 @@
 
 #include <linux/string.h>	/* for generic string functions */
 
-
-#define __kernel_ok		(uaccess_kernel())
-
-/*
- * Algorithmically, for __user_ok() we want do:
- * 	(start < TASK_SIZE) && (start+len < TASK_SIZE)
- * where TASK_SIZE could either be retrieved from thread_info->addr_limit or
- * emitted directly in code.
- *
- * This can however be rewritten as follows:
- *	(len <= TASK_SIZE) && (start+len < TASK_SIZE)
- *
- * Because it essentially checks if buffer end is within limit and @len is
- * non-ngeative, which implies that buffer start will be within limit too.
- *
- * The reason for rewriting being, for majority of cases, @len is generally
- * compile time constant, causing first sub-expression to be compile time
- * subsumed.
- *
- * The second part would generate weird large LIMMs e.g. (0x6000_0000 - 0x10),
- * so we check for TASK_SIZE using get_fs() since the addr_limit load from mem
- * would already have been done at this call site for __kernel_ok()
- *
- */
-#define __user_ok(addr, sz)	(((sz) <= TASK_SIZE) && \
-				 ((addr) <= (get_fs() - (sz))))
-#define __access_ok(addr, sz)	(unlikely(__kernel_ok) || \
-				 likely(__user_ok((addr), (sz))))
-
 /*********** Single byte/hword/word copies ******************/
 
 #define __get_user_fn(sz, u, k)					\
@@ -655,91 +626,18 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
 	return res;
 }
 
-static inline long
-__arc_strncpy_from_user(char *dst, const char __user *src, long count)
-{
-	long res = 0;
-	char val;
-
-	if (count == 0)
-		return 0;
-
-	__asm__ __volatile__(
-	"	mov	lp_count, %5		\n"
-	"	lp	3f			\n"
-	"1:	ldb.ab  %3, [%2, 1]		\n"
-	"	breq.d	%3, 0, 3f               \n"
-	"	stb.ab  %3, [%1, 1]		\n"
-	"	add	%0, %0, 1	# Num of NON NULL bytes copied	\n"
-	"3:								\n"
-	"	.section .fixup, \"ax\"		\n"
-	"	.align 4			\n"
-	"4:	mov %0, %4		# sets @res as -EFAULT	\n"
-	"	j   3b				\n"
-	"	.previous			\n"
-	"	.section __ex_table, \"a\"	\n"
-	"	.align 4			\n"
-	"	.word   1b, 4b			\n"
-	"	.previous			\n"
-	: "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
-	: "g"(-EFAULT), "r"(count)
-	: "lp_count", "memory");
-
-	return res;
-}
-
-static inline long __arc_strnlen_user(const char __user *s, long n)
-{
-	long res, tmp1, cnt;
-	char val;
-
-	__asm__ __volatile__(
-	"	mov %2, %1			\n"
-	"1:	ldb.ab  %3, [%0, 1]		\n"
-	"	breq.d  %3, 0, 2f		\n"
-	"	sub.f   %2, %2, 1		\n"
-	"	bnz 1b				\n"
-	"	sub %2, %2, 1			\n"
-	"2:	sub %0, %1, %2			\n"
-	"3:	;nop				\n"
-	"	.section .fixup, \"ax\"		\n"
-	"	.align 4			\n"
-	"4:	mov %0, 0			\n"
-	"	j   3b				\n"
-	"	.previous			\n"
-	"	.section __ex_table, \"a\"	\n"
-	"	.align 4			\n"
-	"	.word 1b, 4b			\n"
-	"	.previous			\n"
-	: "=r"(res), "=r"(tmp1), "=r"(cnt), "=r"(val)
-	: "0"(s), "1"(n)
-	: "memory");
-
-	return res;
-}
-
 #ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
 
 #define INLINE_COPY_TO_USER
 #define INLINE_COPY_FROM_USER
 
 #define __clear_user(d, n)		__arc_clear_user(d, n)
-#define __strncpy_from_user(d, s, n)	__arc_strncpy_from_user(d, s, n)
-#define __strnlen_user(s, n)		__arc_strnlen_user(s, n)
 #else
 extern unsigned long arc_clear_user_noinline(void __user *to,
 		unsigned long n);
-extern long arc_strncpy_from_user_noinline (char *dst, const char __user *src,
-		long count);
-extern long arc_strnlen_user_noinline(const char __user *src, long n);
-
 #define __clear_user(d, n)		arc_clear_user_noinline(d, n)
-#define __strncpy_from_user(d, s, n)	arc_strncpy_from_user_noinline(d, s, n)
-#define __strnlen_user(s, n)		arc_strnlen_user_noinline(s, n)
-
 #endif
 
-#include <asm/segment.h>
 #include <asm-generic/uaccess.h>
 
 #endif
diff --git a/arch/arc/include/uapi/asm/bpf_perf_event.h b/arch/arc/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000000..6cb1c2823288
--- /dev/null
+++ b/arch/arc/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <asm/ptrace.h>
+
+typedef struct user_regs_struct bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h
index 2a97e2718a21..2a4ad619abfb 100644
--- a/arch/arc/include/uapi/asm/page.h
+++ b/arch/arc/include/uapi/asm/page.h
@@ -33,5 +33,4 @@
 
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
-
 #endif /* _UAPI__ASM_ARC_PAGE_H */
diff --git a/arch/arc/include/uapi/asm/sigcontext.h b/arch/arc/include/uapi/asm/sigcontext.h
index 95f8a4380e11..7a5449dfcb29 100644
--- a/arch/arc/include/uapi/asm/sigcontext.h
+++ b/arch/arc/include/uapi/asm/sigcontext.h
@@ -18,6 +18,7 @@
  */
 struct sigcontext {
 	struct user_regs_struct regs;
+	struct user_regs_arcv2 v2abi;
 };
 
 #endif /* _ASM_ARC_SIGCONTEXT_H */