aboutsummaryrefslogtreecommitdiff
path: root/platform/linux-generic/arch/aarch64/odp_cpu.h
blob: cd15cda2d7ab5f040d6c3575bc9803867e0c3a4e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
/* SPDX-License-Identifier: BSD-3-Clause
 * Copyright (c) 2017 ARM Limited
 * Copyright (c) 2017-2018 Linaro Limited
 */

#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H
#define PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H

#if !defined(__aarch64__)
#error Use this file only when compiling for ARMv8 architecture
#endif

#include <odp_debug_internal.h>
#include <odp_types_internal.h>

/*
 * Use LLD/SCD atomic primitives instead of lock-based code path in llqueue
 * LLD/SCD is on ARM the fastest way to enqueue and dequeue elements from a
 * linked list queue.
 */
#define CONFIG_LLDSCD

/*
 * Use DMB;STR instead of STRL on ARM
 * On early ARMv8 implementations (e.g. Cortex-A57) this is noticeably more
 * performant than using store-release.
 * This also allows for load-only barriers (DMB ISHLD) which are much cheaper
 * than a full barrier
 */
#define CONFIG_DMBSTR

/* Only ARMv8 supports DMB ISHLD */
/* A load only barrier is much cheaper than full barrier */
#define _odp_release_barrier(ro) \
do {							     \
	if (ro)						     \
		__asm__ volatile("dmb ishld" ::: "memory");  \
	else						     \
		__asm__ volatile("dmb ish" ::: "memory");    \
} while (0)

static inline uint16_t ll8(uint8_t *var, int mm)
{
	uint16_t old;

	_ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED);

	if (mm == __ATOMIC_ACQUIRE)
		__asm__ volatile("ldaxrb %w0, [%1]"
				 : "=&r" (old)
				 : "r" (var)
				 : "memory");
	else
		__asm__ volatile("ldxrb %w0, [%1]"
				 : "=&r" (old)
				 : "r" (var)
				 : );
	return old;
}

static inline uint32_t ll32(uint32_t *var, int mm)
{
	uint32_t old;

	_ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED);

	if (mm == __ATOMIC_ACQUIRE)
		__asm__ volatile("ldaxr %w0, [%1]"
				 : "=&r" (old)
				 : "r" (var)
				 : "memory");
	else
		__asm__ volatile("ldxr %w0, [%1]"
				 : "=&r" (old)
				 : "r" (var)
				 : );
	return old;
}

/* Return 0 on success, 1 on failure */
static inline uint32_t sc32(uint32_t *var, uint32_t neu, int mm)
{
	uint32_t ret;

	_ODP_ASSERT(mm == __ATOMIC_RELEASE || mm == __ATOMIC_RELAXED);

	if (mm == __ATOMIC_RELEASE)
		__asm__ volatile("stlxr %w0, %w1, [%2]"
				 : "=&r" (ret)
				 : "r" (neu), "r" (var)
				 : "memory");
	else
		__asm__ volatile("stxr %w0, %w1, [%2]"
				 : "=&r" (ret)
				 : "r" (neu), "r" (var)
				 : );
	return ret;
}

static inline uint64_t ll64(uint64_t *var, int mm)
{
	uint64_t old;

	_ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED);

	if (mm == __ATOMIC_ACQUIRE)
		__asm__ volatile("ldaxr %0, [%1]"
				 : "=&r" (old)
				 : "r" (var)
				 : "memory");
	else
		__asm__ volatile("ldxr %0, [%1]"
				 : "=&r" (old)
				 : "r" (var)
				 : );
	return old;
}

/* Return 0 on success, 1 on failure */
static inline uint32_t sc64(uint64_t *var, uint64_t neu, int mm)
{
	uint32_t ret;

	_ODP_ASSERT(mm == __ATOMIC_RELEASE || mm == __ATOMIC_RELAXED);

	if (mm == __ATOMIC_RELEASE)
		__asm__ volatile("stlxr %w0, %1, [%2]"
				 : "=&r" (ret)
				 : "r" (neu), "r" (var)
				 : "memory");
	else
		__asm__ volatile("stxr %w0, %1, [%2]"
				 : "=&r" (ret)
				 : "r" (neu), "r" (var)
				 : );
	return ret;
}

union i128 {
	_odp_u128_t i128;
	int64_t  i64[2];
};

static inline _odp_u128_t lld(_odp_u128_t *var, int mm)
{
	union i128 old;

	_ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED);

	if (mm == __ATOMIC_ACQUIRE)
		__asm__ volatile("ldaxp %0, %1, [%2]"
				 : "=&r" (old.i64[0]), "=&r" (old.i64[1])
				 : "r" (var)
				 : "memory");
	else
		__asm__ volatile("ldxp %0, %1, [%2]"
				 : "=&r" (old.i64[0]), "=&r" (old.i64[1])
				 : "r" (var)
				 : );
	return old.i128;
}

/* Return 0 on success, 1 on failure */
static inline uint32_t scd(_odp_u128_t *var, _odp_u128_t neu, int mm)
{
	uint32_t ret;

	_ODP_ASSERT(mm == __ATOMIC_RELEASE || mm == __ATOMIC_RELAXED);

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
	if (mm == __ATOMIC_RELEASE)
		__asm__ volatile("stlxp %w0, %1, %2, [%3]"
				 : "=&r" (ret)
				 : "r" (((*(union i128 *)&neu)).i64[0]),
				   "r" (((*(union i128 *)&neu)).i64[1]),
				   "r" (var)
				 : "memory");
	else
		__asm__ volatile("stxp %w0, %1, %2, [%3]"
				 : "=&r" (ret)
				 : "r" (((*(union i128 *)&neu)).i64[0]),
				   "r" (((*(union i128 *)&neu)).i64[1]),
				   "r" (var)
				 : );
#pragma GCC diagnostic pop
	return ret;
}

#include "odp_atomic.h"
#include "odp_wait_until.h"

#ifdef __ARM_FEATURE_UNALIGNED
#define _ODP_UNALIGNED 1
#else
#define _ODP_UNALIGNED 0
#endif

#endif  /* PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H */