aboutsummaryrefslogtreecommitdiff
path: root/platform/linux-generic/arch/aarch64/odp_cpu.h
diff options
context:
space:
mode:
Diffstat (limited to 'platform/linux-generic/arch/aarch64/odp_cpu.h')
-rw-r--r--platform/linux-generic/arch/aarch64/odp_cpu.h199
1 files changed, 199 insertions, 0 deletions
diff --git a/platform/linux-generic/arch/aarch64/odp_cpu.h b/platform/linux-generic/arch/aarch64/odp_cpu.h
new file mode 100644
index 000000000..cd15cda2d
--- /dev/null
+++ b/platform/linux-generic/arch/aarch64/odp_cpu.h
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2017 ARM Limited
+ * Copyright (c) 2017-2018 Linaro Limited
+ */
+
+#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H
+#define PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H
+
+#if !defined(__aarch64__)
+#error Use this file only when compiling for ARMv8 architecture
+#endif
+
+#include <odp_debug_internal.h>
+#include <odp_types_internal.h>
+
+/*
+ * Use LLD/SCD atomic primitives instead of lock-based code path in llqueue
+ * LLD/SCD is on ARM the fastest way to enqueue and dequeue elements from a
+ * linked list queue.
+ */
+#define CONFIG_LLDSCD
+
+/*
+ * Use DMB;STR instead of STRL on ARM
+ * On early ARMv8 implementations (e.g. Cortex-A57) this is noticeably more
+ * performant than using store-release.
+ * This also allows for load-only barriers (DMB ISHLD) which are much cheaper
+ * than a full barrier
+ */
+#define CONFIG_DMBSTR
+
+/* Only ARMv8 supports DMB ISHLD */
+/* A load only barrier is much cheaper than full barrier */
+#define _odp_release_barrier(ro) \
+do { \
+ if (ro) \
+ __asm__ volatile("dmb ishld" ::: "memory"); \
+ else \
+ __asm__ volatile("dmb ish" ::: "memory"); \
+} while (0)
+
+static inline uint16_t ll8(uint8_t *var, int mm)
+{
+ uint16_t old;
+
+ _ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED);
+
+ if (mm == __ATOMIC_ACQUIRE)
+ __asm__ volatile("ldaxrb %w0, [%1]"
+ : "=&r" (old)
+ : "r" (var)
+ : "memory");
+ else
+ __asm__ volatile("ldxrb %w0, [%1]"
+ : "=&r" (old)
+ : "r" (var)
+ : );
+ return old;
+}
+
+static inline uint32_t ll32(uint32_t *var, int mm)
+{
+ uint32_t old;
+
+ _ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED);
+
+ if (mm == __ATOMIC_ACQUIRE)
+ __asm__ volatile("ldaxr %w0, [%1]"
+ : "=&r" (old)
+ : "r" (var)
+ : "memory");
+ else
+ __asm__ volatile("ldxr %w0, [%1]"
+ : "=&r" (old)
+ : "r" (var)
+ : );
+ return old;
+}
+
+/* Return 0 on success, 1 on failure */
+static inline uint32_t sc32(uint32_t *var, uint32_t neu, int mm)
+{
+ uint32_t ret;
+
+ _ODP_ASSERT(mm == __ATOMIC_RELEASE || mm == __ATOMIC_RELAXED);
+
+ if (mm == __ATOMIC_RELEASE)
+ __asm__ volatile("stlxr %w0, %w1, [%2]"
+ : "=&r" (ret)
+ : "r" (neu), "r" (var)
+ : "memory");
+ else
+ __asm__ volatile("stxr %w0, %w1, [%2]"
+ : "=&r" (ret)
+ : "r" (neu), "r" (var)
+ : );
+ return ret;
+}
+
+static inline uint64_t ll64(uint64_t *var, int mm)
+{
+ uint64_t old;
+
+ _ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED);
+
+ if (mm == __ATOMIC_ACQUIRE)
+ __asm__ volatile("ldaxr %0, [%1]"
+ : "=&r" (old)
+ : "r" (var)
+ : "memory");
+ else
+ __asm__ volatile("ldxr %0, [%1]"
+ : "=&r" (old)
+ : "r" (var)
+ : );
+ return old;
+}
+
+/* Return 0 on success, 1 on failure */
+static inline uint32_t sc64(uint64_t *var, uint64_t neu, int mm)
+{
+ uint32_t ret;
+
+ _ODP_ASSERT(mm == __ATOMIC_RELEASE || mm == __ATOMIC_RELAXED);
+
+ if (mm == __ATOMIC_RELEASE)
+ __asm__ volatile("stlxr %w0, %1, [%2]"
+ : "=&r" (ret)
+ : "r" (neu), "r" (var)
+ : "memory");
+ else
+ __asm__ volatile("stxr %w0, %1, [%2]"
+ : "=&r" (ret)
+ : "r" (neu), "r" (var)
+ : );
+ return ret;
+}
+
+union i128 {
+ _odp_u128_t i128;
+ int64_t i64[2];
+};
+
+static inline _odp_u128_t lld(_odp_u128_t *var, int mm)
+{
+ union i128 old;
+
+ _ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED);
+
+ if (mm == __ATOMIC_ACQUIRE)
+ __asm__ volatile("ldaxp %0, %1, [%2]"
+ : "=&r" (old.i64[0]), "=&r" (old.i64[1])
+ : "r" (var)
+ : "memory");
+ else
+ __asm__ volatile("ldxp %0, %1, [%2]"
+ : "=&r" (old.i64[0]), "=&r" (old.i64[1])
+ : "r" (var)
+ : );
+ return old.i128;
+}
+
+/* Return 0 on success, 1 on failure */
+static inline uint32_t scd(_odp_u128_t *var, _odp_u128_t neu, int mm)
+{
+ uint32_t ret;
+
+ _ODP_ASSERT(mm == __ATOMIC_RELEASE || mm == __ATOMIC_RELAXED);
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpedantic"
+ if (mm == __ATOMIC_RELEASE)
+ __asm__ volatile("stlxp %w0, %1, %2, [%3]"
+ : "=&r" (ret)
+ : "r" (((*(union i128 *)&neu)).i64[0]),
+ "r" (((*(union i128 *)&neu)).i64[1]),
+ "r" (var)
+ : "memory");
+ else
+ __asm__ volatile("stxp %w0, %1, %2, [%3]"
+ : "=&r" (ret)
+ : "r" (((*(union i128 *)&neu)).i64[0]),
+ "r" (((*(union i128 *)&neu)).i64[1]),
+ "r" (var)
+ : );
+#pragma GCC diagnostic pop
+ return ret;
+}
+
+#include "odp_atomic.h"
+#include "odp_wait_until.h"
+
+#ifdef __ARM_FEATURE_UNALIGNED
+#define _ODP_UNALIGNED 1
+#else
+#define _ODP_UNALIGNED 0
+#endif
+
+#endif /* PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H */