blob: 3abe9682f413d686a6c5137f03bc044867b8246b [file] [log] [blame]
Jacob Bramleyd77a8e42019-02-12 16:52:24 +00001// Copyright 2019, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7// * Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// * Neither the name of ARM Limited nor the names of its contributors may be
13// used to endorse or promote products derived from this software without
14// specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <sys/mman.h>
28
29#include <cfloat>
30#include <cmath>
31#include <cstdio>
32#include <cstdlib>
33#include <cstring>
34
35#include "test-runner.h"
36#include "test-utils.h"
37#include "aarch64/test-utils-aarch64.h"
38
39#include "aarch64/cpu-aarch64.h"
40#include "aarch64/disasm-aarch64.h"
41#include "aarch64/macro-assembler-aarch64.h"
42#include "aarch64/simulator-aarch64.h"
43#include "test-assembler-aarch64.h"
44
45namespace vixl {
46namespace aarch64 {
47
Jacob Bramley03c0b512019-02-22 16:42:06 +000048// Call masm->Insr repeatedly to allow test inputs to be set up concisely. This
49// is optimised for call-site clarity, not generated code quality, so it doesn't
50// exist in the MacroAssembler itself.
51//
52// Usage:
53//
54// int values[] = { 42, 43, 44 };
55// InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 }
56//
57// The rightmost (highest-indexed) array element maps to the lowest-numbered
58// lane.
59template <typename T, size_t N>
60void InsrHelper(MacroAssembler* masm,
61 const ZRegister& zdn,
62 const T (&values)[N]) {
63 for (size_t i = 0; i < N; i++) {
64 masm->Insr(zdn, values[i]);
65 }
66}
67
Jacob Bramley2eaecf12019-05-01 15:46:34 +010068// Conveniently initialise P registers. This is optimised for call-site clarity,
69// not generated code quality.
70//
71// Usage:
72//
73// int values[] = { 0x0, 0x1, 0x2 };
74// Initialise(&masm, p0.VnS(), values); // Sets p0 = 0b'0000'0001'0020
75//
76// The rightmost (highest-indexed) array element maps to the lowest-numbered
77// lane.
78//
79// Each element of the `values` array is mapped onto a lane in `pd`. The
80// architecture only respects the lower bit, and writes zero the upper bits, but
81// other (encodable) values can be specified if required by the test.
82template <typename T, size_t N>
83void Initialise(MacroAssembler* masm,
84 const PRegisterWithLaneSize& pd,
85 const T (&values)[N]) {
86 UseScratchRegisterScope temps(masm);
87 Register temp = temps.AcquireX();
88 Label data;
89 Label done;
90
91 // There is no 'insr' for P registers. The easiest way to initialise one with
92 // an arbitrary value is to load it from a literal pool.
93
94 int p_bits_per_lane = pd.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
95 VIXL_ASSERT((N * p_bits_per_lane) <= kPRegMaxSize);
96 uint64_t p_lane_mask = GetUintMask(p_bits_per_lane);
97
98 // For most lane sizes, each value contributes less than a byte. We need to
99 // pack them into chunks which we can store directly. It's sensible for the
100 // chunk to be the same size as an instruction because we need to pad to an
101 // instruction boundary anyway.
102 typedef Instr Chunk;
103 const size_t kChunkSizeInBits = sizeof(Chunk) * kBitsPerByte;
104 VIXL_ASSERT((kPRegMaxSize % kChunkSizeInBits) == 0);
105 const size_t kPRegMaxSizeInChunks = kPRegMaxSize / kChunkSizeInBits;
106
107 masm->Adr(temp, &data);
108 // TODO: Use `Ldr(pd, MemOperand(temp))` once available.
109 masm->Ldr(PRegister(pd.GetCode()), temp);
110 masm->B(&done);
111 {
112 ExactAssemblyScope total(masm, kPRegMaxSizeInBytes);
113 masm->bind(&data);
114 // Put the last-specified value at the lowest address.
115 int values_index = N - 1;
116 for (size_t c = 0; c < kPRegMaxSizeInChunks; c++) {
117 Chunk chunk = 0;
118 // Whilst we still have values left, use them to populate the chunk.
119 for (size_t chunk_bit = 0;
120 (chunk_bit < kChunkSizeInBits) && (values_index >= 0);
121 chunk_bit += p_bits_per_lane) {
122 Chunk value = values[values_index] & p_lane_mask;
123 VIXL_ASSERT(static_cast<T>(value) == values[values_index]);
124 chunk |= value << chunk_bit;
125 values_index--;
126 }
127 masm->dc(chunk);
128 }
129 }
130 masm->Bind(&done);
131}
132
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000133// Ensure that basic test infrastructure works.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100134TEST(sve_test_infrastructure_z) {
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000135 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
136 START();
137
Jacob Bramley03c0b512019-02-22 16:42:06 +0000138 __ Mov(x0, 0x0123456789abcdef);
139
140 // Test basic `Insr` behaviour.
141 __ Insr(z0.VnB(), 1);
142 __ Insr(z0.VnB(), 2);
143 __ Insr(z0.VnB(), x0);
144 __ Insr(z0.VnB(), -42);
145 __ Insr(z0.VnB(), 0);
146
147 // Test array inputs.
148 int z1_inputs[] = {3, 4, 5, -42, 0};
149 InsrHelper(&masm, z1.VnH(), z1_inputs);
150
151 // Test that sign-extension works as intended for various lane sizes.
152 __ Dup(z2.VnD(), 0); // Clear the register first.
153 __ Insr(z2.VnB(), -42); // 0xd6
154 __ Insr(z2.VnB(), 0xfe); // 0xfe
155 __ Insr(z2.VnH(), -42); // 0xffd6
156 __ Insr(z2.VnH(), 0xfedc); // 0xfedc
157 __ Insr(z2.VnS(), -42); // 0xffffffd6
158 __ Insr(z2.VnS(), 0xfedcba98); // 0xfedcba98
159 // Use another register for VnD(), so we can support 128-bit Z registers.
160 __ Insr(z3.VnD(), -42); // 0xffffffffffffffd6
161 __ Insr(z3.VnD(), 0xfedcba9876543210); // 0xfedcba9876543210
162
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000163 END();
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000164
Jacob Bramley119bd212019-04-16 10:13:09 +0100165 if (CAN_RUN()) {
Jacob Bramley03c0b512019-02-22 16:42:06 +0000166 if (0) {
167 // TODO: Enable this once we have sufficient simulator support.
168 RUN();
169
170 // Test that array checks work properly on a register initialised
171 // lane-by-lane.
172 int z0_inputs_b[] = {0x01, 0x02, 0xef, 0xd6, 0x00};
173 ASSERT_EQUAL_SVE(z0_inputs_b, z0.VnB());
174
175 // Test that lane-by-lane checks work properly on a register initialised
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100176 // by array.
Jacob Bramley03c0b512019-02-22 16:42:06 +0000177 for (size_t i = 0; i < ArrayLength(z1_inputs); i++) {
178 ASSERT_EQUAL_SVE_LANE(z1_inputs[i], z0.VnH(), i);
179 }
180
181 uint64_t z2_inputs_d[] = {0x0000d6feffd6fedc, 0xffffffd6fedcba98};
182 ASSERT_EQUAL_SVE(z2_inputs_d, z2.VnD());
183 uint64_t z3_inputs_d[] = {0xffffffffffffffd6, 0xfedcba9876543210};
184 ASSERT_EQUAL_SVE(z3_inputs_d, z3.VnD());
185 } else {
186 // TODO: This normally happens in 'RUN()', so remove it once we enable the
187 // block above.
188 DISASSEMBLE();
189 }
Jacob Bramley119bd212019-04-16 10:13:09 +0100190 }
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000191
192 TEARDOWN();
193}
194
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100195// Ensure that basic test infrastructure works.
196TEST(sve_test_infrastructure_p) {
197 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
198 START();
199
200 // Simple cases: move boolean (0 or 1) values.
201
202 int p0_inputs[] = {0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
203 Initialise(&masm, p0.VnB(), p0_inputs);
204
205 int p1_inputs[] = {1, 0, 1, 1, 0, 1, 1, 1};
206 Initialise(&masm, p1.VnH(), p1_inputs);
207
208 int p2_inputs[] = {1, 0, 0, 1};
209 Initialise(&masm, p2.VnS(), p2_inputs);
210
211 int p3_inputs[] = {0, 1};
212 Initialise(&masm, p3.VnD(), p3_inputs);
213
214 // Advanced cases: move numeric value into architecturally-ignored bits.
215
216 // B-sized lanes get one bit in a P register, so there are no ignored bits.
217
218 // H-sized lanes get two bits in a P register.
219 int p4_inputs[] = {0x3, 0x2, 0x1, 0x0, 0x1, 0x2, 0x3};
220 Initialise(&masm, p4.VnH(), p4_inputs);
221
222 // S-sized lanes get four bits in a P register.
223 int p5_inputs[] = {0xc, 0x7, 0x9, 0x6, 0xf};
224 Initialise(&masm, p5.VnS(), p5_inputs);
225
226 // D-sized lanes get eight bits in a P register.
227 int p6_inputs[] = {0x81, 0xcc, 0x55};
228 Initialise(&masm, p6.VnD(), p6_inputs);
229
230 // The largest possible P register has 32 bytes.
231 int p7_inputs[] = {0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
232 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
233 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
234 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f};
235 Initialise(&masm, p7.VnD(), p7_inputs);
236
237 END();
238
239 if (CAN_RUN()) {
240 if (0) {
241 // TODO: Enable this once we have sufficient simulator support.
242 RUN();
243
244 // Test that lane-by-lane checks work properly.
245 for (size_t i = 0; i < ArrayLength(p0_inputs); i++) {
246 ASSERT_EQUAL_SVE_LANE(p0_inputs[i], p0.VnB(), i);
247 }
248 for (size_t i = 0; i < ArrayLength(p1_inputs); i++) {
249 ASSERT_EQUAL_SVE_LANE(p1_inputs[i], p1.VnB(), i);
250 }
251 for (size_t i = 0; i < ArrayLength(p2_inputs); i++) {
252 ASSERT_EQUAL_SVE_LANE(p2_inputs[i], p2.VnB(), i);
253 }
254 for (size_t i = 0; i < ArrayLength(p3_inputs); i++) {
255 ASSERT_EQUAL_SVE_LANE(p3_inputs[i], p3.VnB(), i);
256 }
257
258 // Test that array checks work properly on predicates initialised with a
259 // possibly-different lane size.
260 // 0b...11'10'01'00'01'10'11
261 int p4_expected[] = {0x39, 0x1b};
262 ASSERT_EQUAL_SVE(p4_expected, p4.VnD());
263
264 ASSERT_EQUAL_SVE(p5_inputs, p5.VnS());
265
266 // 0b...10000001'11001100'01010101
267 int p6_expected[] = {2, 0, 0, 1, 3, 0, 3, 0, 1, 1, 1, 1};
268 ASSERT_EQUAL_SVE(p6_expected, p6.VnH());
269
270 // 0b...10011100'10011101'10011110'10011111
271 int p7_expected[] = {1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1,
272 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1};
273 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
274
275 } else {
276 // TODO: This normally happens in 'RUN()', so remove it once we enable the
277 // block above.
278 DISASSEMBLE();
279 }
280 }
281}
282
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000283} // namespace aarch64
284} // namespace vixl