blob: bac18bf3b246e7baf47852c2d844f14a4f850aa1 [file] [log] [blame]
Jacob Bramleyd77a8e42019-02-12 16:52:24 +00001// Copyright 2019, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7// * Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// * Neither the name of ARM Limited nor the names of its contributors may be
13// used to endorse or promote products derived from this software without
14// specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <sys/mman.h>
28
29#include <cfloat>
30#include <cmath>
31#include <cstdio>
32#include <cstdlib>
33#include <cstring>
34
35#include "test-runner.h"
36#include "test-utils.h"
37#include "aarch64/test-utils-aarch64.h"
38
39#include "aarch64/cpu-aarch64.h"
40#include "aarch64/disasm-aarch64.h"
41#include "aarch64/macro-assembler-aarch64.h"
42#include "aarch64/simulator-aarch64.h"
43#include "test-assembler-aarch64.h"
44
45namespace vixl {
46namespace aarch64 {
47
Jacob Bramleye8289202019-07-31 11:25:23 +010048Test* MakeSVETest(int vl, const char* name, Test::TestFunctionWithConfig* fn) {
49 // We never free this memory, but we need it to live for as long as the static
50 // linked list of tests, and this is the easiest way to do it.
51 Test* test = new Test(name, fn);
52 test->set_sve_vl_in_bits(vl);
53 return test;
54}
55
56// The TEST_SVE macro works just like the usual TEST macro, but the resulting
57// function receives a `const Test& config` argument, to allow it to query the
58// vector length.
59#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
60// On the Simulator, run SVE tests with several vector lengths, including the
61// extreme values and an intermediate value that isn't a power of two.
62
63#define TEST_SVE(name) \
64 void Test##name(Test* config); \
65 Test* test_##name##_list[] = \
66 {MakeSVETest(128, "AARCH64_ASM_" #name "_vl128", &Test##name), \
67 MakeSVETest(384, "AARCH64_ASM_" #name "_vl384", &Test##name), \
68 MakeSVETest(2048, "AARCH64_ASM_" #name "_vl2048", &Test##name)}; \
69 void Test##name(Test* config)
70
71#define SVE_SETUP_WITH_FEATURES(...) \
72 SETUP_WITH_FEATURES(__VA_ARGS__); \
73 simulator.SetVectorLengthInBits(config->sve_vl_in_bits())
74
75#else
76// Otherwise, just use whatever the hardware provides.
77static const int kSVEVectorLengthInBits =
78 CPUFeatures::InferFromOS().Has(CPUFeatures::kSVE)
79 ? CPU::ReadSVEVectorLengthInBits()
80 : 0;
81
82#define TEST_SVE(name) \
83 void Test##name(Test* config); \
84 Test* test_##name##_vlauto = MakeSVETest(kSVEVectorLengthInBits, \
85 "AARCH64_ASM_" #name "_vlauto", \
86 &Test##name); \
87 void Test##name(Test* config)
88
89#define SVE_SETUP_WITH_FEATURES(...) \
90 SETUP_WITH_FEATURES(__VA_ARGS__); \
91 USE(config)
92
93#endif
94
Jacob Bramley03c0b512019-02-22 16:42:06 +000095// Call masm->Insr repeatedly to allow test inputs to be set up concisely. This
96// is optimised for call-site clarity, not generated code quality, so it doesn't
97// exist in the MacroAssembler itself.
98//
99// Usage:
100//
101// int values[] = { 42, 43, 44 };
102// InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 }
103//
104// The rightmost (highest-indexed) array element maps to the lowest-numbered
105// lane.
106template <typename T, size_t N>
107void InsrHelper(MacroAssembler* masm,
108 const ZRegister& zdn,
109 const T (&values)[N]) {
110 for (size_t i = 0; i < N; i++) {
111 masm->Insr(zdn, values[i]);
112 }
113}
114
Jacob Bramley0ce75842019-07-17 18:12:50 +0100115// Conveniently initialise P registers with scalar bit patterns. The destination
116// lane size is ignored. This is optimised for call-site clarity, not generated
117// code quality.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100118//
119// Usage:
120//
Jacob Bramley0ce75842019-07-17 18:12:50 +0100121// Initialise(&masm, p0, 0x1234); // Sets p0 = 0b'0001'0010'0011'0100
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100122void Initialise(MacroAssembler* masm,
Jacob Bramley0ce75842019-07-17 18:12:50 +0100123 const PRegister& pd,
124 uint64_t value3,
125 uint64_t value2,
126 uint64_t value1,
127 uint64_t value0) {
128 // Generate a literal pool, as in the array form.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100129 UseScratchRegisterScope temps(masm);
130 Register temp = temps.AcquireX();
131 Label data;
132 Label done;
133
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100134 masm->Adr(temp, &data);
Jacob Bramley66e66712019-08-02 17:45:32 +0100135 masm->Ldr(pd, SVEMemOperand(temp));
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100136 masm->B(&done);
137 {
138 ExactAssemblyScope total(masm, kPRegMaxSizeInBytes);
139 masm->bind(&data);
Jacob Bramley0ce75842019-07-17 18:12:50 +0100140 masm->dc64(value0);
141 masm->dc64(value1);
142 masm->dc64(value2);
143 masm->dc64(value3);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100144 }
145 masm->Bind(&done);
146}
Jacob Bramley0ce75842019-07-17 18:12:50 +0100147void Initialise(MacroAssembler* masm,
148 const PRegister& pd,
149 uint64_t value2,
150 uint64_t value1,
151 uint64_t value0) {
152 Initialise(masm, pd, 0, value2, value1, value0);
153}
154void Initialise(MacroAssembler* masm,
155 const PRegister& pd,
156 uint64_t value1,
157 uint64_t value0) {
158 Initialise(masm, pd, 0, 0, value1, value0);
159}
160void Initialise(MacroAssembler* masm, const PRegister& pd, uint64_t value0) {
161 Initialise(masm, pd, 0, 0, 0, value0);
162}
163
164// Conveniently initialise P registers by lane. This is optimised for call-site
165// clarity, not generated code quality.
166//
167// Usage:
168//
169// int values[] = { 0x0, 0x1, 0x2 };
170// Initialise(&masm, p0.VnS(), values); // Sets p0 = 0b'0000'0001'0010
171//
172// The rightmost (highest-indexed) array element maps to the lowest-numbered
173// lane. Unspecified lanes are set to 0 (inactive).
174//
175// Each element of the `values` array is mapped onto a lane in `pd`. The
176// architecture only respects the lower bit, and writes zero the upper bits, but
177// other (encodable) values can be specified if required by the test.
178template <typename T, size_t N>
179void Initialise(MacroAssembler* masm,
180 const PRegisterWithLaneSize& pd,
181 const T (&values)[N]) {
182 // Turn the array into 64-bit chunks.
183 uint64_t chunks[4] = {0, 0, 0, 0};
184 VIXL_STATIC_ASSERT(sizeof(chunks) == kPRegMaxSizeInBytes);
185
186 int p_bits_per_lane = pd.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
187 VIXL_ASSERT((64 % p_bits_per_lane) == 0);
188 VIXL_ASSERT((N * p_bits_per_lane) <= kPRegMaxSize);
189
190 uint64_t p_lane_mask = GetUintMask(p_bits_per_lane);
191
192 VIXL_STATIC_ASSERT(N <= kPRegMaxSize);
193 size_t bit = 0;
194 for (int n = static_cast<int>(N - 1); n >= 0; n--) {
195 VIXL_ASSERT(bit < (sizeof(chunks) * kBitsPerByte));
196 uint64_t value = values[n] & p_lane_mask;
197 chunks[bit / 64] |= value << (bit % 64);
198 bit += p_bits_per_lane;
199 }
200
201 Initialise(masm, pd, chunks[3], chunks[2], chunks[1], chunks[0]);
202}
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100203
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000204// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100205TEST_SVE(sve_test_infrastructure_z) {
206 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000207 START();
208
Jacob Bramley03c0b512019-02-22 16:42:06 +0000209 __ Mov(x0, 0x0123456789abcdef);
210
211 // Test basic `Insr` behaviour.
212 __ Insr(z0.VnB(), 1);
213 __ Insr(z0.VnB(), 2);
214 __ Insr(z0.VnB(), x0);
215 __ Insr(z0.VnB(), -42);
216 __ Insr(z0.VnB(), 0);
217
218 // Test array inputs.
219 int z1_inputs[] = {3, 4, 5, -42, 0};
220 InsrHelper(&masm, z1.VnH(), z1_inputs);
221
222 // Test that sign-extension works as intended for various lane sizes.
223 __ Dup(z2.VnD(), 0); // Clear the register first.
224 __ Insr(z2.VnB(), -42); // 0xd6
225 __ Insr(z2.VnB(), 0xfe); // 0xfe
226 __ Insr(z2.VnH(), -42); // 0xffd6
227 __ Insr(z2.VnH(), 0xfedc); // 0xfedc
228 __ Insr(z2.VnS(), -42); // 0xffffffd6
229 __ Insr(z2.VnS(), 0xfedcba98); // 0xfedcba98
230 // Use another register for VnD(), so we can support 128-bit Z registers.
231 __ Insr(z3.VnD(), -42); // 0xffffffffffffffd6
232 __ Insr(z3.VnD(), 0xfedcba9876543210); // 0xfedcba9876543210
233
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000234 END();
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000235
Jacob Bramley119bd212019-04-16 10:13:09 +0100236 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100237 RUN();
Jacob Bramley03c0b512019-02-22 16:42:06 +0000238
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100239 // Test that array checks work properly on a register initialised
240 // lane-by-lane.
241 int z0_inputs_b[] = {0x01, 0x02, 0xef, 0xd6, 0x00};
242 ASSERT_EQUAL_SVE(z0_inputs_b, z0.VnB());
Jacob Bramley03c0b512019-02-22 16:42:06 +0000243
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100244 // Test that lane-by-lane checks work properly on a register initialised
245 // by array.
246 for (size_t i = 0; i < ArrayLength(z1_inputs); i++) {
247 // The rightmost (highest-indexed) array element maps to the
248 // lowest-numbered lane.
249 int lane = static_cast<int>(ArrayLength(z1_inputs) - i - 1);
250 ASSERT_EQUAL_SVE_LANE(z1_inputs[i], z1.VnH(), lane);
Jacob Bramley03c0b512019-02-22 16:42:06 +0000251 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100252
253 uint64_t z2_inputs_d[] = {0x0000d6feffd6fedc, 0xffffffd6fedcba98};
254 ASSERT_EQUAL_SVE(z2_inputs_d, z2.VnD());
255 uint64_t z3_inputs_d[] = {0xffffffffffffffd6, 0xfedcba9876543210};
256 ASSERT_EQUAL_SVE(z3_inputs_d, z3.VnD());
Jacob Bramley119bd212019-04-16 10:13:09 +0100257 }
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000258}
259
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100260// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100261TEST_SVE(sve_test_infrastructure_p) {
262 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100263 START();
264
265 // Simple cases: move boolean (0 or 1) values.
266
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100267 int p0_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100268 Initialise(&masm, p0.VnB(), p0_inputs);
269
270 int p1_inputs[] = {1, 0, 1, 1, 0, 1, 1, 1};
271 Initialise(&masm, p1.VnH(), p1_inputs);
272
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100273 int p2_inputs[] = {1, 1, 0, 1};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100274 Initialise(&masm, p2.VnS(), p2_inputs);
275
276 int p3_inputs[] = {0, 1};
277 Initialise(&masm, p3.VnD(), p3_inputs);
278
279 // Advanced cases: move numeric value into architecturally-ignored bits.
280
281 // B-sized lanes get one bit in a P register, so there are no ignored bits.
282
283 // H-sized lanes get two bits in a P register.
284 int p4_inputs[] = {0x3, 0x2, 0x1, 0x0, 0x1, 0x2, 0x3};
285 Initialise(&masm, p4.VnH(), p4_inputs);
286
287 // S-sized lanes get four bits in a P register.
288 int p5_inputs[] = {0xc, 0x7, 0x9, 0x6, 0xf};
289 Initialise(&masm, p5.VnS(), p5_inputs);
290
291 // D-sized lanes get eight bits in a P register.
292 int p6_inputs[] = {0x81, 0xcc, 0x55};
293 Initialise(&masm, p6.VnD(), p6_inputs);
294
295 // The largest possible P register has 32 bytes.
296 int p7_inputs[] = {0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
297 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
298 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
299 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f};
300 Initialise(&masm, p7.VnD(), p7_inputs);
301
302 END();
303
304 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100305 RUN();
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100306
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100307 // Test that lane-by-lane checks work properly. The rightmost
308 // (highest-indexed) array element maps to the lowest-numbered lane.
309 for (size_t i = 0; i < ArrayLength(p0_inputs); i++) {
310 int lane = static_cast<int>(ArrayLength(p0_inputs) - i - 1);
311 ASSERT_EQUAL_SVE_LANE(p0_inputs[i], p0.VnB(), lane);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100312 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100313 for (size_t i = 0; i < ArrayLength(p1_inputs); i++) {
314 int lane = static_cast<int>(ArrayLength(p1_inputs) - i - 1);
315 ASSERT_EQUAL_SVE_LANE(p1_inputs[i], p1.VnH(), lane);
316 }
317 for (size_t i = 0; i < ArrayLength(p2_inputs); i++) {
318 int lane = static_cast<int>(ArrayLength(p2_inputs) - i - 1);
319 ASSERT_EQUAL_SVE_LANE(p2_inputs[i], p2.VnS(), lane);
320 }
321 for (size_t i = 0; i < ArrayLength(p3_inputs); i++) {
322 int lane = static_cast<int>(ArrayLength(p3_inputs) - i - 1);
323 ASSERT_EQUAL_SVE_LANE(p3_inputs[i], p3.VnD(), lane);
324 }
325
326 // Test that array checks work properly on predicates initialised with a
327 // possibly-different lane size.
328 // 0b...11'10'01'00'01'10'11
329 int p4_expected[] = {0x39, 0x1b};
330 ASSERT_EQUAL_SVE(p4_expected, p4.VnD());
331
332 ASSERT_EQUAL_SVE(p5_inputs, p5.VnS());
333
334 // 0b...10000001'11001100'01010101
335 int p6_expected[] = {2, 0, 0, 1, 3, 0, 3, 0, 1, 1, 1, 1};
336 ASSERT_EQUAL_SVE(p6_expected, p6.VnH());
337
338 // 0b...10011100'10011101'10011110'10011111
339 int p7_expected[] = {1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1,
340 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1};
341 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100342 }
343}
344
Jacob Bramley935b15b2019-07-04 14:09:22 +0100345// Test that writes to V registers clear the high bits of the corresponding Z
346// register.
Jacob Bramleye8289202019-07-31 11:25:23 +0100347TEST_SVE(sve_v_write_clear) {
348 SVE_SETUP_WITH_FEATURES(CPUFeatures::kNEON,
349 CPUFeatures::kFP,
350 CPUFeatures::kSVE);
Jacob Bramley935b15b2019-07-04 14:09:22 +0100351 START();
352
353 // The Simulator has two mechansisms for writing V registers:
354 // - Write*Register, calling through to SimRegisterBase::Write.
355 // - LogicVRegister::ClearForWrite followed by one or more lane updates.
356 // Try to cover both variants.
357
358 // Prepare some known inputs.
359 uint8_t data[kQRegSizeInBytes];
360 for (size_t i = 0; i < kQRegSizeInBytes; i++) {
361 data[i] = 42 + i;
362 }
363 __ Mov(x10, reinterpret_cast<uintptr_t>(data));
364 __ Fmov(d30, 42.0);
365
Jacob Bramley199339d2019-08-05 18:49:13 +0100366 // Use Index to label the lane indices, so failures are easy to detect and
Jacob Bramley935b15b2019-07-04 14:09:22 +0100367 // diagnose.
368 __ Index(z0.VnB(), 0, 1);
369 __ Index(z1.VnB(), 0, 1);
370 __ Index(z2.VnB(), 0, 1);
371 __ Index(z3.VnB(), 0, 1);
372 __ Index(z4.VnB(), 0, 1);
373
374 __ Index(z10.VnB(), 0, -1);
375 __ Index(z11.VnB(), 0, -1);
376 __ Index(z12.VnB(), 0, -1);
377 __ Index(z13.VnB(), 0, -1);
378 __ Index(z14.VnB(), 0, -1);
379
380 // Instructions using Write*Register (and SimRegisterBase::Write).
381 __ Ldr(b0, MemOperand(x10));
382 __ Fcvt(h1, d30);
383 __ Fmov(s2, 1.5f);
384 __ Fmov(d3, d30);
385 __ Ldr(q4, MemOperand(x10));
386
387 // Instructions using LogicVRegister::ClearForWrite.
388 // These also (incidentally) test that across-lane instructions correctly
389 // ignore the high-order Z register lanes.
390 __ Sminv(b10, v10.V16B());
391 __ Addv(h11, v11.V4H());
392 __ Saddlv(s12, v12.V8H());
393 __ Dup(v13.V8B(), b13, kDRegSizeInBytes);
394 __ Uaddl(v14.V8H(), v14.V8B(), v14.V8B());
395
396 END();
397
398 if (CAN_RUN()) {
399 RUN();
400
401 // Check the Q part first.
402 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000002a, v0);
403 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000005140, v1); // 42.0 (f16)
404 ASSERT_EQUAL_128(0x0000000000000000, 0x000000003fc00000, v2); // 1.5 (f32)
405 ASSERT_EQUAL_128(0x0000000000000000, 0x4045000000000000, v3); // 42.0 (f64)
406 ASSERT_EQUAL_128(0x3938373635343332, 0x31302f2e2d2c2b2a, v4);
407 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000f1, v10); // -15
408 // 0xf9fa + 0xfbfc + 0xfdfe + 0xff00 -> 0xf2f4
409 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000f2f4, v11);
410 // 0xfffff1f2 + 0xfffff3f4 + ... + 0xfffffdfe + 0xffffff00 -> 0xffffc6c8
411 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffc6c8, v12);
412 ASSERT_EQUAL_128(0x0000000000000000, 0xf8f8f8f8f8f8f8f8, v13); // [-8] x 8
413 // [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
414 // + [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
415 // -> [0x01f2, 0x01f4, 0x01f6, 0x01f8, 0x01fa, 0x01fc, 0x01fe, 0x0000]
416 ASSERT_EQUAL_128(0x01f201f401f601f8, 0x01fa01fc01fe0000, v14);
417
418 // Check that the upper lanes are all clear.
419 for (int i = kQRegSizeInBytes; i < core.GetSVELaneCount(kBRegSize); i++) {
420 ASSERT_EQUAL_SVE_LANE(0x00, z0.VnB(), i);
421 ASSERT_EQUAL_SVE_LANE(0x00, z1.VnB(), i);
422 ASSERT_EQUAL_SVE_LANE(0x00, z2.VnB(), i);
423 ASSERT_EQUAL_SVE_LANE(0x00, z3.VnB(), i);
424 ASSERT_EQUAL_SVE_LANE(0x00, z4.VnB(), i);
425 ASSERT_EQUAL_SVE_LANE(0x00, z10.VnB(), i);
426 ASSERT_EQUAL_SVE_LANE(0x00, z11.VnB(), i);
427 ASSERT_EQUAL_SVE_LANE(0x00, z12.VnB(), i);
428 ASSERT_EQUAL_SVE_LANE(0x00, z13.VnB(), i);
429 ASSERT_EQUAL_SVE_LANE(0x00, z14.VnB(), i);
430 }
431 }
432}
433
Jacob Bramleye8289202019-07-31 11:25:23 +0100434static void MlaMlsHelper(Test* config, unsigned lane_size_in_bits) {
435 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley22023df2019-05-14 17:55:43 +0100436 START();
437
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100438 int zd_inputs[] = {0xbb, 0xcc, 0xdd, 0xee};
Jacob Bramley22023df2019-05-14 17:55:43 +0100439 int za_inputs[] = {-39, 1, -3, 2};
440 int zn_inputs[] = {-5, -20, 9, 8};
441 int zm_inputs[] = {9, -5, 4, 5};
442
443 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
444 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
445 ZRegister zn = z2.WithLaneSize(lane_size_in_bits);
446 ZRegister zm = z3.WithLaneSize(lane_size_in_bits);
447
448 // TODO: Use a simple `Dup` once it accepts arbitrary immediates.
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100449 InsrHelper(&masm, zd, zd_inputs);
Jacob Bramley22023df2019-05-14 17:55:43 +0100450 InsrHelper(&masm, za, za_inputs);
451 InsrHelper(&masm, zn, zn_inputs);
452 InsrHelper(&masm, zm, zm_inputs);
453
454 int p0_inputs[] = {1, 1, 0, 1};
455 int p1_inputs[] = {1, 0, 1, 1};
456 int p2_inputs[] = {0, 1, 1, 1};
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100457 int p3_inputs[] = {1, 1, 1, 0};
Jacob Bramley22023df2019-05-14 17:55:43 +0100458
459 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), p0_inputs);
460 Initialise(&masm, p1.WithLaneSize(lane_size_in_bits), p1_inputs);
461 Initialise(&masm, p2.WithLaneSize(lane_size_in_bits), p2_inputs);
462 Initialise(&masm, p3.WithLaneSize(lane_size_in_bits), p3_inputs);
463
464 // The Mla macro automatically selects between mla, mad and movprfx + mla
465 // based on what registers are aliased.
466 ZRegister mla_da_result = z10.WithLaneSize(lane_size_in_bits);
467 ZRegister mla_dn_result = z11.WithLaneSize(lane_size_in_bits);
468 ZRegister mla_dm_result = z12.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100469 ZRegister mla_d_result = z13.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100470
471 __ Mov(mla_da_result, za);
472 __ Mla(mla_da_result, p0.Merging(), mla_da_result, zn, zm);
473
474 __ Mov(mla_dn_result, zn);
475 __ Mla(mla_dn_result, p1.Merging(), za, mla_dn_result, zm);
476
477 __ Mov(mla_dm_result, zm);
478 __ Mla(mla_dm_result, p2.Merging(), za, zn, mla_dm_result);
479
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100480 __ Mov(mla_d_result, zd);
481 __ Mla(mla_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100482
483 // The Mls macro automatically selects between mls, msb and movprfx + mls
484 // based on what registers are aliased.
485 ZRegister mls_da_result = z20.WithLaneSize(lane_size_in_bits);
486 ZRegister mls_dn_result = z21.WithLaneSize(lane_size_in_bits);
487 ZRegister mls_dm_result = z22.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100488 ZRegister mls_d_result = z23.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100489
490 __ Mov(mls_da_result, za);
491 __ Mls(mls_da_result, p0.Merging(), mls_da_result, zn, zm);
492
493 __ Mov(mls_dn_result, zn);
494 __ Mls(mls_dn_result, p1.Merging(), za, mls_dn_result, zm);
495
496 __ Mov(mls_dm_result, zm);
497 __ Mls(mls_dm_result, p2.Merging(), za, zn, mls_dm_result);
498
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100499 __ Mov(mls_d_result, zd);
500 __ Mls(mls_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100501
502 END();
503
504 if (CAN_RUN()) {
505 RUN();
506
507 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
508 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits));
509 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits));
510
511 int mla[] = {-84, 101, 33, 42};
512 int mls[] = {6, -99, -39, -38};
513
514 int mla_da_expected[] = {mla[0], mla[1], za_inputs[2], mla[3]};
515 ASSERT_EQUAL_SVE(mla_da_expected, mla_da_result);
516
517 int mla_dn_expected[] = {mla[0], zn_inputs[1], mla[2], mla[3]};
518 ASSERT_EQUAL_SVE(mla_dn_expected, mla_dn_result);
519
520 int mla_dm_expected[] = {zm_inputs[0], mla[1], mla[2], mla[3]};
521 ASSERT_EQUAL_SVE(mla_dm_expected, mla_dm_result);
522
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100523 int mla_d_expected[] = {mla[0], mla[1], mla[2], zd_inputs[3]};
524 ASSERT_EQUAL_SVE(mla_d_expected, mla_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100525
526 int mls_da_expected[] = {mls[0], mls[1], za_inputs[2], mls[3]};
527 ASSERT_EQUAL_SVE(mls_da_expected, mls_da_result);
528
529 int mls_dn_expected[] = {mls[0], zn_inputs[1], mls[2], mls[3]};
530 ASSERT_EQUAL_SVE(mls_dn_expected, mls_dn_result);
531
532 int mls_dm_expected[] = {zm_inputs[0], mls[1], mls[2], mls[3]};
533 ASSERT_EQUAL_SVE(mls_dm_expected, mls_dm_result);
534
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100535 int mls_d_expected[] = {mls[0], mls[1], mls[2], zd_inputs[3]};
536 ASSERT_EQUAL_SVE(mls_d_expected, mls_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100537 }
538}
539
Jacob Bramleye8289202019-07-31 11:25:23 +0100540TEST_SVE(sve_mla_mls_b) { MlaMlsHelper(config, kBRegSize); }
541TEST_SVE(sve_mla_mls_h) { MlaMlsHelper(config, kHRegSize); }
542TEST_SVE(sve_mla_mls_s) { MlaMlsHelper(config, kSRegSize); }
543TEST_SVE(sve_mla_mls_d) { MlaMlsHelper(config, kDRegSize); }
Jacob Bramley22023df2019-05-14 17:55:43 +0100544
Jacob Bramleye8289202019-07-31 11:25:23 +0100545TEST_SVE(sve_bitwise_unpredicate_logical) {
546 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongcfb94212019-05-16 13:30:09 -0700547 START();
548
549 uint64_t z8_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
550 InsrHelper(&masm, z8.VnD(), z8_inputs);
551 uint64_t z15_inputs[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff};
552 InsrHelper(&masm, z15.VnD(), z15_inputs);
553
554 __ And(z1.VnD(), z8.VnD(), z15.VnD());
555 __ Bic(z2.VnD(), z8.VnD(), z15.VnD());
556 __ Eor(z3.VnD(), z8.VnD(), z15.VnD());
557 __ Orr(z4.VnD(), z8.VnD(), z15.VnD());
558
559 END();
560
561 if (CAN_RUN()) {
562 RUN();
563 uint64_t z1_expected[] = {0xfedcaa8854540000, 0x0000454588aacdef};
564 uint64_t z2_expected[] = {0x0000101022003210, 0x0123002201010000};
565 uint64_t z3_expected[] = {0x01235476ab89fedc, 0xcdef98ba67453210};
566 uint64_t z4_expected[] = {0xfffffefeffddfedc, 0xcdefddffefefffff};
567
568 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
569 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
570 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
571 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
572 }
TatWai Chongcfb94212019-05-16 13:30:09 -0700573}
574
Jacob Bramleye8289202019-07-31 11:25:23 +0100575TEST_SVE(sve_predicate_logical) {
576 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongf4fa8222019-06-17 12:08:14 -0700577 START();
578
579 // 0b...01011010'10110111
580 int p10_inputs[] = {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1}; // Pm
581 // 0b...11011001'01010010
582 int p11_inputs[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0}; // Pn
583 // 0b...01010101'10110010
584 int p12_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0}; // pg
585
586 Initialise(&masm, p10.VnB(), p10_inputs);
587 Initialise(&masm, p11.VnB(), p11_inputs);
588 Initialise(&masm, p12.VnB(), p12_inputs);
589
590 __ Ands(p0.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
591 __ Mrs(x0, NZCV);
592 __ Bics(p1.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
593 __ Mrs(x1, NZCV);
594 __ Eor(p2.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
595 __ Nand(p3.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
596 __ Nor(p4.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
597 __ Orn(p5.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
598 __ Orr(p6.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
599 __ Sel(p7.VnB(), p12, p11.VnB(), p10.VnB());
600
601 END();
602
603 if (CAN_RUN()) {
604 RUN();
605
606 // 0b...01010000'00010010
607 int p0_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0};
608 // 0b...00000001'00000000
609 int p1_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0};
610 // 0b...00000001'10100000
611 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
612 // 0b...00000101'10100000
613 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
614 // 0b...00000100'00000000
615 int p4_expected[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
616 // 0b...01010101'00010010
617 int p5_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0};
618 // 0b...01010001'10110010
619 int p6_expected[] = {0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
620 // 0b...01011011'00010111
621 int p7_expected[] = {0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1};
622
623 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
624 ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
625 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
626 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
627 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
628 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
629 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
630 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
631
TatWai Chong96713fe2019-06-04 16:39:37 -0700632 ASSERT_EQUAL_32(SVEFirstFlag, w0);
633 ASSERT_EQUAL_32(SVENotLastFlag, w1);
634 }
635}
TatWai Chongf4fa8222019-06-17 12:08:14 -0700636
Jacob Bramleye8289202019-07-31 11:25:23 +0100637TEST_SVE(sve_int_compare_vectors) {
638 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -0700639 START();
640
641 int z10_inputs[] = {0x00, 0x80, 0xff, 0x7f, 0x00, 0x00, 0x00, 0xff};
642 int z11_inputs[] = {0x00, 0x00, 0x00, 0x00, 0x80, 0xff, 0x7f, 0xfe};
643 int p0_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
644 InsrHelper(&masm, z10.VnB(), z10_inputs);
645 InsrHelper(&masm, z11.VnB(), z11_inputs);
646 Initialise(&masm, p0.VnB(), p0_inputs);
647
648 __ Cmphs(p6.VnB(), p0.Zeroing(), z10.VnB(), z11.VnB());
649 __ Mrs(x6, NZCV);
650
651 uint64_t z12_inputs[] = {0xffffffffffffffff, 0x8000000000000000};
652 uint64_t z13_inputs[] = {0x0000000000000000, 0x8000000000000000};
653 int p1_inputs[] = {1, 1};
654 InsrHelper(&masm, z12.VnD(), z12_inputs);
655 InsrHelper(&masm, z13.VnD(), z13_inputs);
656 Initialise(&masm, p1.VnD(), p1_inputs);
657
658 __ Cmphi(p7.VnD(), p1.Zeroing(), z12.VnD(), z13.VnD());
659 __ Mrs(x7, NZCV);
660
661 int z14_inputs[] = {0, 32767, -1, -32767, 0, 0, 0, 32766};
662 int z15_inputs[] = {0, 0, 0, 0, 32767, -1, -32767, 32767};
663
664 int p2_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
665 InsrHelper(&masm, z14.VnH(), z14_inputs);
666 InsrHelper(&masm, z15.VnH(), z15_inputs);
667 Initialise(&masm, p2.VnH(), p2_inputs);
668
669 __ Cmpge(p8.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
670 __ Mrs(x8, NZCV);
671
672 __ Cmpeq(p9.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
673 __ Mrs(x9, NZCV);
674
675 int z16_inputs[] = {0, -1, 0, 0};
676 int z17_inputs[] = {0, 0, 2147483647, -2147483648};
677 int p3_inputs[] = {1, 1, 1, 1};
678 InsrHelper(&masm, z16.VnS(), z16_inputs);
679 InsrHelper(&masm, z17.VnS(), z17_inputs);
680 Initialise(&masm, p3.VnS(), p3_inputs);
681
682 __ Cmpgt(p10.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
683 __ Mrs(x10, NZCV);
684
685 __ Cmpne(p11.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
686 __ Mrs(x11, NZCV);
687
688 // Architectural aliases testing.
689 __ Cmpls(p12.VnB(), p0.Zeroing(), z11.VnB(), z10.VnB()); // HS
690 __ Cmplo(p13.VnD(), p1.Zeroing(), z13.VnD(), z12.VnD()); // HI
691 __ Cmple(p14.VnH(), p2.Zeroing(), z15.VnH(), z14.VnH()); // GE
692 __ Cmplt(p15.VnS(), p3.Zeroing(), z17.VnS(), z16.VnS()); // GT
693
694 END();
695
696 if (CAN_RUN()) {
697 RUN();
698
699 int p6_expected[] = {1, 0, 1, 1, 0, 0, 0, 1};
700 for (size_t i = 0; i < ArrayLength(p6_expected); i++) {
701 int lane = static_cast<int>(ArrayLength(p6_expected) - i - 1);
702 ASSERT_EQUAL_SVE_LANE(p6_expected[i], p6.VnB(), lane);
703 }
704
705 int p7_expected[] = {1, 0};
706 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
707
708 int p8_expected[] = {1, 0, 0, 0, 0, 1, 1, 0};
709 ASSERT_EQUAL_SVE(p8_expected, p8.VnH());
710
711 int p9_expected[] = {1, 0, 0, 0, 0, 0, 0, 0};
712 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
713
714 int p10_expected[] = {0, 0, 0, 1};
715 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
716
717 int p11_expected[] = {0, 1, 1, 1};
718 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
719
720 // Reuse the expected results to verify the architectural aliases.
721 ASSERT_EQUAL_SVE(p6_expected, p12.VnB());
722 ASSERT_EQUAL_SVE(p7_expected, p13.VnD());
723 ASSERT_EQUAL_SVE(p8_expected, p14.VnH());
724 ASSERT_EQUAL_SVE(p10_expected, p15.VnS());
725
726 ASSERT_EQUAL_32(SVEFirstFlag, w6);
727 ASSERT_EQUAL_32(NoFlag, w7);
728 ASSERT_EQUAL_32(NoFlag, w8);
729 ASSERT_EQUAL_32(NoFlag, w9);
730 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
731 }
732}
733
Jacob Bramleye8289202019-07-31 11:25:23 +0100734TEST_SVE(sve_int_compare_vectors_wide_elements) {
735 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -0700736 START();
737
738 int src1_inputs_1[] = {0, 1, -1, -128, 127, 100, -66};
739 int src2_inputs_1[] = {0, -1};
740 int mask_inputs_1[] = {1, 1, 1, 1, 1, 0, 1};
741 InsrHelper(&masm, z13.VnB(), src1_inputs_1);
742 InsrHelper(&masm, z19.VnD(), src2_inputs_1);
743 Initialise(&masm, p0.VnB(), mask_inputs_1);
744
745 __ Cmpge(p2.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
746 __ Mrs(x2, NZCV);
747 __ Cmpgt(p3.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
748 __ Mrs(x3, NZCV);
749
750 int src1_inputs_2[] = {0, 32767, -1, -32767, 1, 1234, 0, 32766};
751 int src2_inputs_2[] = {0, -32767};
752 int mask_inputs_2[] = {1, 0, 1, 1, 1, 1, 1, 1};
753 InsrHelper(&masm, z13.VnH(), src1_inputs_2);
754 InsrHelper(&masm, z19.VnD(), src2_inputs_2);
755 Initialise(&masm, p0.VnH(), mask_inputs_2);
756
757 __ Cmple(p4.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
758 __ Mrs(x4, NZCV);
759 __ Cmplt(p5.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
760 __ Mrs(x5, NZCV);
761
762 int src1_inputs_3[] = {0, -1, 2147483647, -2147483648};
763 int src2_inputs_3[] = {0, -2147483648};
764 int mask_inputs_3[] = {1, 1, 1, 1};
765 InsrHelper(&masm, z13.VnS(), src1_inputs_3);
766 InsrHelper(&masm, z19.VnD(), src2_inputs_3);
767 Initialise(&masm, p0.VnS(), mask_inputs_3);
768
769 __ Cmpeq(p6.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
770 __ Mrs(x6, NZCV);
771 __ Cmpne(p7.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
772 __ Mrs(x7, NZCV);
773
774 int src1_inputs_4[] = {0x00, 0x80, 0x7f, 0xff, 0x7f, 0xf0, 0x0f, 0x55};
775 int src2_inputs_4[] = {0x00, 0x7f};
776 int mask_inputs_4[] = {1, 1, 1, 1, 0, 1, 1, 1};
777 InsrHelper(&masm, z13.VnB(), src1_inputs_4);
778 InsrHelper(&masm, z19.VnD(), src2_inputs_4);
779 Initialise(&masm, p0.VnB(), mask_inputs_4);
780
781 __ Cmplo(p8.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
782 __ Mrs(x8, NZCV);
783 __ Cmpls(p9.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
784 __ Mrs(x9, NZCV);
785
786 int src1_inputs_5[] = {0x0000, 0x8000, 0x7fff, 0xffff};
787 int src2_inputs_5[] = {0x8000, 0xffff};
788 int mask_inputs_5[] = {1, 1, 1, 1};
789 InsrHelper(&masm, z13.VnS(), src1_inputs_5);
790 InsrHelper(&masm, z19.VnD(), src2_inputs_5);
791 Initialise(&masm, p0.VnS(), mask_inputs_5);
792
793 __ Cmphi(p10.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
794 __ Mrs(x10, NZCV);
795 __ Cmphs(p11.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
796 __ Mrs(x11, NZCV);
797
798 END();
799
800 if (CAN_RUN()) {
801 RUN();
802 int p2_expected[] = {1, 1, 1, 0, 1, 0, 0};
803 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
804
805 int p3_expected[] = {1, 1, 0, 0, 1, 0, 0};
806 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
807
808 int p4_expected[] = {0x1, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
809 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
810
811 int p5_expected[] = {0x0, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
812 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
813
814 int p6_expected[] = {0x1, 0x0, 0x0, 0x1};
815 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
816
817 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
818 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
819
820 int p8_expected[] = {1, 0, 0, 0, 0, 0, 1, 1};
821 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
822
823 int p9_expected[] = {1, 0, 1, 0, 0, 0, 1, 1};
824 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
825
826 int p10_expected[] = {0x0, 0x0, 0x0, 0x0};
827 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
828
829 int p11_expected[] = {0x0, 0x1, 0x0, 0x1};
830 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
831
832 ASSERT_EQUAL_32(NoFlag, w2);
833 ASSERT_EQUAL_32(NoFlag, w3);
834 ASSERT_EQUAL_32(NoFlag, w4);
835 ASSERT_EQUAL_32(SVENotLastFlag, w5);
836 ASSERT_EQUAL_32(SVEFirstFlag, w6);
837 ASSERT_EQUAL_32(SVENotLastFlag, w7);
838 ASSERT_EQUAL_32(SVEFirstFlag, w8);
839 ASSERT_EQUAL_32(SVEFirstFlag, w9);
840 ASSERT_EQUAL_32(SVENotLastFlag | SVENoneFlag, w10);
841 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w11);
TatWai Chongf4fa8222019-06-17 12:08:14 -0700842 }
TatWai Chongf4fa8222019-06-17 12:08:14 -0700843}
844
Jacob Bramleye8289202019-07-31 11:25:23 +0100845TEST_SVE(sve_bitwise_imm) {
846 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chonga1885a52019-04-15 17:19:14 -0700847 START();
848
849 // clang-format off
850 uint64_t z21_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
851 uint32_t z22_inputs[] = {0xfedcba98, 0x76543210, 0x01234567, 0x89abcdef};
852 uint16_t z23_inputs[] = {0xfedc, 0xba98, 0x7654, 0x3210,
853 0x0123, 0x4567, 0x89ab, 0xcdef};
854 uint8_t z24_inputs[] = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
855 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef};
856 // clang-format on
857
858 InsrHelper(&masm, z1.VnD(), z21_inputs);
859 InsrHelper(&masm, z2.VnS(), z22_inputs);
860 InsrHelper(&masm, z3.VnH(), z23_inputs);
861 InsrHelper(&masm, z4.VnB(), z24_inputs);
862
863 __ And(z1.VnD(), z1.VnD(), 0x0000ffff0000ffff);
864 __ And(z2.VnS(), z2.VnS(), 0xff0000ff);
865 __ And(z3.VnH(), z3.VnH(), 0x0ff0);
866 __ And(z4.VnB(), z4.VnB(), 0x3f);
867
868 InsrHelper(&masm, z5.VnD(), z21_inputs);
869 InsrHelper(&masm, z6.VnS(), z22_inputs);
870 InsrHelper(&masm, z7.VnH(), z23_inputs);
871 InsrHelper(&masm, z8.VnB(), z24_inputs);
872
873 __ Eor(z5.VnD(), z5.VnD(), 0x0000ffff0000ffff);
874 __ Eor(z6.VnS(), z6.VnS(), 0xff0000ff);
875 __ Eor(z7.VnH(), z7.VnH(), 0x0ff0);
876 __ Eor(z8.VnB(), z8.VnB(), 0x3f);
877
878 InsrHelper(&masm, z9.VnD(), z21_inputs);
879 InsrHelper(&masm, z10.VnS(), z22_inputs);
880 InsrHelper(&masm, z11.VnH(), z23_inputs);
881 InsrHelper(&masm, z12.VnB(), z24_inputs);
882
883 __ Orr(z9.VnD(), z9.VnD(), 0x0000ffff0000ffff);
884 __ Orr(z10.VnS(), z10.VnS(), 0xff0000ff);
885 __ Orr(z11.VnH(), z11.VnH(), 0x0ff0);
886 __ Orr(z12.VnB(), z12.VnB(), 0x3f);
887
Jacob Bramley6069fd42019-06-24 10:20:45 +0100888 {
889 // The `Dup` macro maps onto either `dup` or `dupm`, but has its own test,
890 // so here we test `dupm` directly.
891 ExactAssemblyScope guard(&masm, 4 * kInstructionSize);
892 __ dupm(z13.VnD(), 0x7ffffff800000000);
893 __ dupm(z14.VnS(), 0x7ffc7ffc);
894 __ dupm(z15.VnH(), 0x3ffc);
895 __ dupm(z16.VnB(), 0xc3);
896 }
TatWai Chonga1885a52019-04-15 17:19:14 -0700897
898 END();
899
900 if (CAN_RUN()) {
901 RUN();
902
903 // clang-format off
904 uint64_t z1_expected[] = {0x0000ba9800003210, 0x000045670000cdef};
905 uint32_t z2_expected[] = {0xfe000098, 0x76000010, 0x01000067, 0x890000ef};
906 uint16_t z3_expected[] = {0x0ed0, 0x0a90, 0x0650, 0x0210,
907 0x0120, 0x0560, 0x09a0, 0x0de0};
908 uint8_t z4_expected[] = {0x3e, 0x1c, 0x3a, 0x18, 0x36, 0x14, 0x32, 0x10,
909 0x01, 0x23, 0x05, 0x27, 0x09, 0x2b, 0x0d, 0x2f};
910
911 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
912 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
913 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
914 ASSERT_EQUAL_SVE(z4_expected, z4.VnB());
915
916 uint64_t z5_expected[] = {0xfedc45677654cdef, 0x0123ba9889ab3210};
917 uint32_t z6_expected[] = {0x01dcba67, 0x895432ef, 0xfe234598, 0x76abcd10};
918 uint16_t z7_expected[] = {0xf12c, 0xb568, 0x79a4, 0x3de0,
919 0x0ed3, 0x4a97, 0x865b, 0xc21f};
920 uint8_t z8_expected[] = {0xc1, 0xe3, 0x85, 0xa7, 0x49, 0x6b, 0x0d, 0x2f,
921 0x3e, 0x1c, 0x7a, 0x58, 0xb6, 0x94, 0xf2, 0xd0};
922
923 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
924 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
925 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
926 ASSERT_EQUAL_SVE(z8_expected, z8.VnB());
927
928 uint64_t z9_expected[] = {0xfedcffff7654ffff, 0x0123ffff89abffff};
929 uint32_t z10_expected[] = {0xffdcbaff, 0xff5432ff, 0xff2345ff, 0xffabcdff};
930 uint16_t z11_expected[] = {0xfffc, 0xbff8, 0x7ff4, 0x3ff0,
931 0x0ff3, 0x4ff7, 0x8ffb, 0xcfff};
932 uint8_t z12_expected[] = {0xff, 0xff, 0xbf, 0xbf, 0x7f, 0x7f, 0x3f, 0x3f,
933 0x3f, 0x3f, 0x7f, 0x7f, 0xbf, 0xbf, 0xff, 0xff};
934
935 ASSERT_EQUAL_SVE(z9_expected, z9.VnD());
936 ASSERT_EQUAL_SVE(z10_expected, z10.VnS());
937 ASSERT_EQUAL_SVE(z11_expected, z11.VnH());
938 ASSERT_EQUAL_SVE(z12_expected, z12.VnB());
939
940 uint64_t z13_expected[] = {0x7ffffff800000000, 0x7ffffff800000000};
941 uint32_t z14_expected[] = {0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc};
942 uint16_t z15_expected[] = {0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc,
943 0x3ffc, 0x3ffc, 0x3ffc ,0x3ffc};
944 ASSERT_EQUAL_SVE(z13_expected, z13.VnD());
945 ASSERT_EQUAL_SVE(z14_expected, z14.VnS());
946 ASSERT_EQUAL_SVE(z15_expected, z15.VnH());
947 // clang-format on
948 }
TatWai Chonga1885a52019-04-15 17:19:14 -0700949}
950
Jacob Bramleye8289202019-07-31 11:25:23 +0100951TEST_SVE(sve_dup_imm) {
Jacob Bramley6069fd42019-06-24 10:20:45 +0100952 // The `Dup` macro can generate `dup`, `dupm`, and it can synthesise
953 // unencodable immediates.
954
Jacob Bramleye8289202019-07-31 11:25:23 +0100955 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley6069fd42019-06-24 10:20:45 +0100956 START();
957
958 // Encodable with `dup` (shift 0).
959 __ Dup(z0.VnD(), -1);
960 __ Dup(z1.VnS(), 0x7f);
961 __ Dup(z2.VnH(), -0x80);
962 __ Dup(z3.VnB(), 42);
963
964 // Encodable with `dup` (shift 8).
965 // TODO: Enable these once we have Simulator support.
966 // __ Dup(z4.VnD(), -42 * 256);
967 // __ Dup(z5.VnS(), -0x8000);
968 // __ Dup(z6.VnH(), 0x7f00);
969 // B-sized lanes cannot take a shift of 8.
970
971 // Encodable with `dupm` (but not `dup`).
972 __ Dup(z10.VnD(), 0x3fc);
973 __ Dup(z11.VnS(), -516097); // 0xfff81fff, as a signed int.
974 __ Dup(z12.VnH(), 0x0001);
975 // All values that fit B-sized lanes are encodable with `dup`.
976
977 // Cases that require immediate synthesis.
978 __ Dup(z20.VnD(), 0x1234);
979 __ Dup(z21.VnD(), -4242);
980 __ Dup(z22.VnD(), 0xfedcba9876543210);
981 __ Dup(z23.VnS(), 0x01020304);
982 __ Dup(z24.VnS(), -0x01020304);
983 __ Dup(z25.VnH(), 0x3c38);
984 // All values that fit B-sized lanes are directly encodable.
985
986 END();
987
988 if (CAN_RUN()) {
989 RUN();
990
991 ASSERT_EQUAL_SVE(0xffffffffffffffff, z0.VnD());
992 ASSERT_EQUAL_SVE(0x0000007f, z1.VnS());
993 ASSERT_EQUAL_SVE(0xff80, z2.VnH());
994 ASSERT_EQUAL_SVE(0x2a, z3.VnB());
995
996 // TODO: Enable these once we have Simulator support.
997 // ASSERT_EQUAL_SVE(0x0000000000003c00, z4.VnD());
998 // ASSERT_EQUAL_SVE(0xffff8000, z5.VnS());
999 // ASSERT_EQUAL_SVE(0x7f00, z6.VnH());
1000
1001 ASSERT_EQUAL_SVE(0x00000000000003fc, z10.VnD());
1002 ASSERT_EQUAL_SVE(0xfff81fff, z11.VnS());
1003 ASSERT_EQUAL_SVE(0x0001, z12.VnH());
1004
1005 ASSERT_EQUAL_SVE(0x1234, z20.VnD());
1006 ASSERT_EQUAL_SVE(0xffffffffffffef6e, z21.VnD());
1007 ASSERT_EQUAL_SVE(0xfedcba9876543210, z22.VnD());
1008 ASSERT_EQUAL_SVE(0x01020304, z23.VnS());
1009 ASSERT_EQUAL_SVE(0xfefdfcfc, z24.VnS());
1010 ASSERT_EQUAL_SVE(0x3c38, z25.VnH());
1011 }
1012}
1013
Jacob Bramleye8289202019-07-31 11:25:23 +01001014TEST_SVE(sve_inc_dec_p_scalar) {
1015 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001016 START();
1017
1018 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1019 Initialise(&masm, p0.VnB(), p0_inputs);
1020
1021 int p0_b_count = 9;
1022 int p0_h_count = 5;
1023 int p0_s_count = 3;
1024 int p0_d_count = 2;
1025
1026 // 64-bit operations preserve their high bits.
1027 __ Mov(x0, 0x123456780000002a);
1028 __ Decp(x0, p0.VnB());
1029
1030 __ Mov(x1, 0x123456780000002a);
1031 __ Incp(x1, p0.VnH());
1032
1033 // Check that saturation does not occur.
1034 __ Mov(x10, 1);
1035 __ Decp(x10, p0.VnS());
1036
1037 __ Mov(x11, UINT64_MAX);
1038 __ Incp(x11, p0.VnD());
1039
1040 __ Mov(x12, INT64_MAX);
1041 __ Incp(x12, p0.VnB());
1042
1043 // With an all-true predicate, these instructions increment or decrement by
1044 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001045 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001046
1047 __ Mov(x20, 0x4000000000000000);
1048 __ Decp(x20, p15.VnB());
1049
1050 __ Mov(x21, 0x4000000000000000);
1051 __ Incp(x21, p15.VnH());
1052
1053 END();
1054 if (CAN_RUN()) {
1055 RUN();
1056
1057 ASSERT_EQUAL_64(0x123456780000002a - p0_b_count, x0);
1058 ASSERT_EQUAL_64(0x123456780000002a + p0_h_count, x1);
1059
1060 ASSERT_EQUAL_64(UINT64_C(1) - p0_s_count, x10);
1061 ASSERT_EQUAL_64(UINT64_MAX + p0_d_count, x11);
1062 ASSERT_EQUAL_64(static_cast<uint64_t>(INT64_MAX) + p0_b_count, x12);
1063
1064 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1065 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1066 }
1067}
1068
Jacob Bramleye8289202019-07-31 11:25:23 +01001069TEST_SVE(sve_sqinc_sqdec_p_scalar) {
1070 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001071 START();
1072
1073 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1074 Initialise(&masm, p0.VnB(), p0_inputs);
1075
1076 int p0_b_count = 9;
1077 int p0_h_count = 5;
1078 int p0_s_count = 3;
1079 int p0_d_count = 2;
1080
1081 uint64_t dummy_high = 0x1234567800000000;
1082
1083 // 64-bit operations preserve their high bits.
1084 __ Mov(x0, dummy_high + 42);
1085 __ Sqdecp(x0, p0.VnB());
1086
1087 __ Mov(x1, dummy_high + 42);
1088 __ Sqincp(x1, p0.VnH());
1089
1090 // 32-bit operations sign-extend into their high bits.
1091 __ Mov(x2, dummy_high + 42);
1092 __ Sqdecp(x2, p0.VnS(), w2);
1093
1094 __ Mov(x3, dummy_high + 42);
1095 __ Sqincp(x3, p0.VnD(), w3);
1096
1097 __ Mov(x4, dummy_high + 1);
1098 __ Sqdecp(x4, p0.VnS(), w4);
1099
1100 __ Mov(x5, dummy_high - 1);
1101 __ Sqincp(x5, p0.VnD(), w5);
1102
1103 // Check that saturation behaves correctly.
1104 __ Mov(x10, 0x8000000000000001); // INT64_MIN + 1
1105 __ Sqdecp(x10, p0.VnB(), x10);
1106
1107 __ Mov(x11, dummy_high + 0x80000001); // INT32_MIN + 1
1108 __ Sqdecp(x11, p0.VnH(), w11);
1109
1110 __ Mov(x12, 1);
1111 __ Sqdecp(x12, p0.VnS(), x12);
1112
1113 __ Mov(x13, dummy_high + 1);
1114 __ Sqdecp(x13, p0.VnD(), w13);
1115
1116 __ Mov(x14, 0x7ffffffffffffffe); // INT64_MAX - 1
1117 __ Sqincp(x14, p0.VnB(), x14);
1118
1119 __ Mov(x15, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1120 __ Sqincp(x15, p0.VnH(), w15);
1121
1122 // Don't use x16 and x17 since they are scratch registers by default.
1123
1124 __ Mov(x18, 0xffffffffffffffff);
1125 __ Sqincp(x18, p0.VnS(), x18);
1126
1127 __ Mov(x19, dummy_high + 0xffffffff);
1128 __ Sqincp(x19, p0.VnD(), w19);
1129
1130 __ Mov(x20, dummy_high + 0xffffffff);
1131 __ Sqdecp(x20, p0.VnB(), w20);
1132
1133 // With an all-true predicate, these instructions increment or decrement by
1134 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001135 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001136
1137 __ Mov(x21, 0);
1138 __ Sqdecp(x21, p15.VnB(), x21);
1139
1140 __ Mov(x22, 0);
1141 __ Sqincp(x22, p15.VnH(), x22);
1142
1143 __ Mov(x23, dummy_high);
1144 __ Sqdecp(x23, p15.VnS(), w23);
1145
1146 __ Mov(x24, dummy_high);
1147 __ Sqincp(x24, p15.VnD(), w24);
1148
1149 END();
1150 if (CAN_RUN()) {
1151 RUN();
1152
1153 // 64-bit operations preserve their high bits.
1154 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1155 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1156
1157 // 32-bit operations sign-extend into their high bits.
1158 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1159 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1160 ASSERT_EQUAL_64(0xffffffff00000000 | (1 - p0_s_count), x4);
1161 ASSERT_EQUAL_64(p0_d_count - 1, x5);
1162
1163 // Check that saturation behaves correctly.
1164 ASSERT_EQUAL_64(INT64_MIN, x10);
1165 ASSERT_EQUAL_64(INT32_MIN, x11);
1166 ASSERT_EQUAL_64(1 - p0_s_count, x12);
1167 ASSERT_EQUAL_64(1 - p0_d_count, x13);
1168 ASSERT_EQUAL_64(INT64_MAX, x14);
1169 ASSERT_EQUAL_64(INT32_MAX, x15);
1170 ASSERT_EQUAL_64(p0_s_count - 1, x18);
1171 ASSERT_EQUAL_64(p0_d_count - 1, x19);
1172 ASSERT_EQUAL_64(-1 - p0_b_count, x20);
1173
1174 // Check all-true predicates.
1175 ASSERT_EQUAL_64(-core.GetSVELaneCount(kBRegSize), x21);
1176 ASSERT_EQUAL_64(core.GetSVELaneCount(kHRegSize), x22);
1177 ASSERT_EQUAL_64(-core.GetSVELaneCount(kSRegSize), x23);
1178 ASSERT_EQUAL_64(core.GetSVELaneCount(kDRegSize), x24);
1179 }
1180}
1181
Jacob Bramleye8289202019-07-31 11:25:23 +01001182TEST_SVE(sve_uqinc_uqdec_p_scalar) {
1183 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001184 START();
1185
1186 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1187 Initialise(&masm, p0.VnB(), p0_inputs);
1188
1189 int p0_b_count = 9;
1190 int p0_h_count = 5;
1191 int p0_s_count = 3;
1192 int p0_d_count = 2;
1193
1194 uint64_t dummy_high = 0x1234567800000000;
1195
1196 // 64-bit operations preserve their high bits.
1197 __ Mov(x0, dummy_high + 42);
1198 __ Uqdecp(x0, p0.VnB());
1199
1200 __ Mov(x1, dummy_high + 42);
1201 __ Uqincp(x1, p0.VnH());
1202
1203 // 32-bit operations zero-extend into their high bits.
1204 __ Mov(x2, dummy_high + 42);
1205 __ Uqdecp(x2, p0.VnS(), w2);
1206
1207 __ Mov(x3, dummy_high + 42);
1208 __ Uqincp(x3, p0.VnD(), w3);
1209
1210 __ Mov(x4, dummy_high + 0x80000001);
1211 __ Uqdecp(x4, p0.VnS(), w4);
1212
1213 __ Mov(x5, dummy_high + 0x7fffffff);
1214 __ Uqincp(x5, p0.VnD(), w5);
1215
1216 // Check that saturation behaves correctly.
1217 __ Mov(x10, 1);
1218 __ Uqdecp(x10, p0.VnB(), x10);
1219
1220 __ Mov(x11, dummy_high + 1);
1221 __ Uqdecp(x11, p0.VnH(), w11);
1222
1223 __ Mov(x12, 0x8000000000000000); // INT64_MAX + 1
1224 __ Uqdecp(x12, p0.VnS(), x12);
1225
1226 __ Mov(x13, dummy_high + 0x80000000); // INT32_MAX + 1
1227 __ Uqdecp(x13, p0.VnD(), w13);
1228
1229 __ Mov(x14, 0xfffffffffffffffe); // UINT64_MAX - 1
1230 __ Uqincp(x14, p0.VnB(), x14);
1231
1232 __ Mov(x15, dummy_high + 0xfffffffe); // UINT32_MAX - 1
1233 __ Uqincp(x15, p0.VnH(), w15);
1234
1235 // Don't use x16 and x17 since they are scratch registers by default.
1236
1237 __ Mov(x18, 0x7ffffffffffffffe); // INT64_MAX - 1
1238 __ Uqincp(x18, p0.VnS(), x18);
1239
1240 __ Mov(x19, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1241 __ Uqincp(x19, p0.VnD(), w19);
1242
1243 // With an all-true predicate, these instructions increment or decrement by
1244 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001245 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001246
1247 __ Mov(x20, 0x4000000000000000);
1248 __ Uqdecp(x20, p15.VnB(), x20);
1249
1250 __ Mov(x21, 0x4000000000000000);
1251 __ Uqincp(x21, p15.VnH(), x21);
1252
1253 __ Mov(x22, dummy_high + 0x40000000);
1254 __ Uqdecp(x22, p15.VnS(), w22);
1255
1256 __ Mov(x23, dummy_high + 0x40000000);
1257 __ Uqincp(x23, p15.VnD(), w23);
1258
1259 END();
1260 if (CAN_RUN()) {
1261 RUN();
1262
1263 // 64-bit operations preserve their high bits.
1264 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1265 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1266
1267 // 32-bit operations zero-extend into their high bits.
1268 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1269 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1270 ASSERT_EQUAL_64(UINT64_C(0x80000001) - p0_s_count, x4);
1271 ASSERT_EQUAL_64(UINT64_C(0x7fffffff) + p0_d_count, x5);
1272
1273 // Check that saturation behaves correctly.
1274 ASSERT_EQUAL_64(0, x10);
1275 ASSERT_EQUAL_64(0, x11);
1276 ASSERT_EQUAL_64(0x8000000000000000 - p0_s_count, x12);
1277 ASSERT_EQUAL_64(UINT64_C(0x80000000) - p0_d_count, x13);
1278 ASSERT_EQUAL_64(UINT64_MAX, x14);
1279 ASSERT_EQUAL_64(UINT32_MAX, x15);
1280 ASSERT_EQUAL_64(0x7ffffffffffffffe + p0_s_count, x18);
1281 ASSERT_EQUAL_64(UINT64_C(0x7ffffffe) + p0_d_count, x19);
1282
1283 // Check all-true predicates.
1284 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1285 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1286 ASSERT_EQUAL_64(0x40000000 - core.GetSVELaneCount(kSRegSize), x22);
1287 ASSERT_EQUAL_64(0x40000000 + core.GetSVELaneCount(kDRegSize), x23);
1288 }
1289}
1290
Jacob Bramleye8289202019-07-31 11:25:23 +01001291TEST_SVE(sve_inc_dec_p_vector) {
1292 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001293 START();
1294
1295 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1296 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1297 Initialise(&masm, p0.VnB(), p0_inputs);
1298
1299 // Check that saturation does not occur.
1300
1301 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1302 InsrHelper(&masm, z0.VnD(), z0_inputs);
1303
1304 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1305 InsrHelper(&masm, z1.VnD(), z1_inputs);
1306
1307 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1308 InsrHelper(&masm, z2.VnS(), z2_inputs);
1309
1310 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1311 InsrHelper(&masm, z3.VnH(), z3_inputs);
1312
1313 // The MacroAssembler implements non-destructive operations using movprfx.
1314 __ Decp(z10.VnD(), p0, z0.VnD());
1315 __ Decp(z11.VnD(), p0, z1.VnD());
1316 __ Decp(z12.VnS(), p0, z2.VnS());
1317 __ Decp(z13.VnH(), p0, z3.VnH());
1318
1319 __ Incp(z14.VnD(), p0, z0.VnD());
1320 __ Incp(z15.VnD(), p0, z1.VnD());
1321 __ Incp(z16.VnS(), p0, z2.VnS());
1322 __ Incp(z17.VnH(), p0, z3.VnH());
1323
1324 // Also test destructive forms.
1325 __ Mov(z4, z0);
1326 __ Mov(z5, z1);
1327 __ Mov(z6, z2);
1328 __ Mov(z7, z3);
1329
1330 __ Decp(z0.VnD(), p0);
1331 __ Decp(z1.VnD(), p0);
1332 __ Decp(z2.VnS(), p0);
1333 __ Decp(z3.VnH(), p0);
1334
1335 __ Incp(z4.VnD(), p0);
1336 __ Incp(z5.VnD(), p0);
1337 __ Incp(z6.VnS(), p0);
1338 __ Incp(z7.VnH(), p0);
1339
1340 END();
1341 if (CAN_RUN()) {
1342 RUN();
1343
1344 // z0_inputs[...] - number of active D lanes (2)
1345 int64_t z0_expected[] = {0x1234567800000040, -2, -1, 0x7ffffffffffffffe};
1346 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1347
1348 // z1_inputs[...] - number of active D lanes (2)
1349 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1350 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1351
1352 // z2_inputs[...] - number of active S lanes (3)
1353 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, 0x7ffffffd};
1354 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1355
1356 // z3_inputs[...] - number of active H lanes (5)
1357 int16_t z3_expected[] = {0x1225, -5, -4, -6, 0x7ffb, 0x7ffa};
1358 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1359
1360 // z0_inputs[...] + number of active D lanes (2)
1361 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1362 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1363
1364 // z1_inputs[...] + number of active D lanes (2)
1365 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, 0x8000000000000001};
1366 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1367
1368 // z2_inputs[...] + number of active S lanes (3)
1369 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, 0x80000002, 0x80000003};
1370 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1371
1372 // z3_inputs[...] + number of active H lanes (5)
1373 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, 0x8004};
1374 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1375
1376 // Check that the non-destructive macros produced the same results.
1377 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1378 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1379 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1380 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1381 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1382 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1383 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1384 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1385 }
1386}
1387
Jacob Bramleye8289202019-07-31 11:25:23 +01001388TEST_SVE(sve_inc_dec_ptrue_vector) {
1389 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001390 START();
1391
1392 // With an all-true predicate, these instructions increment or decrement by
1393 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001394 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001395
1396 __ Dup(z0.VnD(), 0);
1397 __ Decp(z0.VnD(), p15);
1398
1399 __ Dup(z1.VnS(), 0);
1400 __ Decp(z1.VnS(), p15);
1401
1402 __ Dup(z2.VnH(), 0);
1403 __ Decp(z2.VnH(), p15);
1404
1405 __ Dup(z3.VnD(), 0);
1406 __ Incp(z3.VnD(), p15);
1407
1408 __ Dup(z4.VnS(), 0);
1409 __ Incp(z4.VnS(), p15);
1410
1411 __ Dup(z5.VnH(), 0);
1412 __ Incp(z5.VnH(), p15);
1413
1414 END();
1415 if (CAN_RUN()) {
1416 RUN();
1417
1418 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1419 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1420 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1421
1422 for (int i = 0; i < d_lane_count; i++) {
1423 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1424 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1425 }
1426
1427 for (int i = 0; i < s_lane_count; i++) {
1428 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1429 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1430 }
1431
1432 for (int i = 0; i < h_lane_count; i++) {
1433 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1434 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1435 }
1436 }
1437}
1438
Jacob Bramleye8289202019-07-31 11:25:23 +01001439TEST_SVE(sve_sqinc_sqdec_p_vector) {
1440 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001441 START();
1442
1443 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1444 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1445 Initialise(&masm, p0.VnB(), p0_inputs);
1446
1447 // Check that saturation behaves correctly.
1448
1449 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1450 InsrHelper(&masm, z0.VnD(), z0_inputs);
1451
1452 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1453 InsrHelper(&masm, z1.VnD(), z1_inputs);
1454
1455 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1456 InsrHelper(&masm, z2.VnS(), z2_inputs);
1457
1458 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1459 InsrHelper(&masm, z3.VnH(), z3_inputs);
1460
1461 // The MacroAssembler implements non-destructive operations using movprfx.
1462 __ Sqdecp(z10.VnD(), p0, z0.VnD());
1463 __ Sqdecp(z11.VnD(), p0, z1.VnD());
1464 __ Sqdecp(z12.VnS(), p0, z2.VnS());
1465 __ Sqdecp(z13.VnH(), p0, z3.VnH());
1466
1467 __ Sqincp(z14.VnD(), p0, z0.VnD());
1468 __ Sqincp(z15.VnD(), p0, z1.VnD());
1469 __ Sqincp(z16.VnS(), p0, z2.VnS());
1470 __ Sqincp(z17.VnH(), p0, z3.VnH());
1471
1472 // Also test destructive forms.
1473 __ Mov(z4, z0);
1474 __ Mov(z5, z1);
1475 __ Mov(z6, z2);
1476 __ Mov(z7, z3);
1477
1478 __ Sqdecp(z0.VnD(), p0);
1479 __ Sqdecp(z1.VnD(), p0);
1480 __ Sqdecp(z2.VnS(), p0);
1481 __ Sqdecp(z3.VnH(), p0);
1482
1483 __ Sqincp(z4.VnD(), p0);
1484 __ Sqincp(z5.VnD(), p0);
1485 __ Sqincp(z6.VnS(), p0);
1486 __ Sqincp(z7.VnH(), p0);
1487
1488 END();
1489 if (CAN_RUN()) {
1490 RUN();
1491
1492 // z0_inputs[...] - number of active D lanes (2)
1493 int64_t z0_expected[] = {0x1234567800000040, -2, -1, INT64_MIN};
1494 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1495
1496 // z1_inputs[...] - number of active D lanes (2)
1497 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1498 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1499
1500 // z2_inputs[...] - number of active S lanes (3)
1501 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, INT32_MIN};
1502 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1503
1504 // z3_inputs[...] - number of active H lanes (5)
1505 int16_t z3_expected[] = {0x1225, -5, -4, -6, INT16_MIN, 0x7ffa};
1506 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1507
1508 // z0_inputs[...] + number of active D lanes (2)
1509 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1510 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1511
1512 // z1_inputs[...] + number of active D lanes (2)
1513 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, INT64_MAX};
1514 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1515
1516 // z2_inputs[...] + number of active S lanes (3)
1517 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, INT32_MAX, 0x80000003};
1518 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1519
1520 // z3_inputs[...] + number of active H lanes (5)
1521 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, INT16_MAX};
1522 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1523
1524 // Check that the non-destructive macros produced the same results.
1525 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1526 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1527 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1528 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1529 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1530 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1531 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1532 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1533 }
1534}
1535
Jacob Bramleye8289202019-07-31 11:25:23 +01001536TEST_SVE(sve_sqinc_sqdec_ptrue_vector) {
1537 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001538 START();
1539
1540 // With an all-true predicate, these instructions increment or decrement by
1541 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001542 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001543
1544 __ Dup(z0.VnD(), 0);
1545 __ Sqdecp(z0.VnD(), p15);
1546
1547 __ Dup(z1.VnS(), 0);
1548 __ Sqdecp(z1.VnS(), p15);
1549
1550 __ Dup(z2.VnH(), 0);
1551 __ Sqdecp(z2.VnH(), p15);
1552
1553 __ Dup(z3.VnD(), 0);
1554 __ Sqincp(z3.VnD(), p15);
1555
1556 __ Dup(z4.VnS(), 0);
1557 __ Sqincp(z4.VnS(), p15);
1558
1559 __ Dup(z5.VnH(), 0);
1560 __ Sqincp(z5.VnH(), p15);
1561
1562 END();
1563 if (CAN_RUN()) {
1564 RUN();
1565
1566 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1567 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1568 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1569
1570 for (int i = 0; i < d_lane_count; i++) {
1571 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1572 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1573 }
1574
1575 for (int i = 0; i < s_lane_count; i++) {
1576 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1577 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1578 }
1579
1580 for (int i = 0; i < h_lane_count; i++) {
1581 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1582 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1583 }
1584 }
1585}
1586
Jacob Bramleye8289202019-07-31 11:25:23 +01001587TEST_SVE(sve_uqinc_uqdec_p_vector) {
1588 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001589 START();
1590
1591 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1592 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1593 Initialise(&masm, p0.VnB(), p0_inputs);
1594
1595 // Check that saturation behaves correctly.
1596
1597 uint64_t z0_inputs[] = {0x1234567800000042, 0, 1, 0x8000000000000000};
1598 InsrHelper(&masm, z0.VnD(), z0_inputs);
1599
1600 uint64_t z1_inputs[] = {0x12345678ffffff2a, 0, UINT64_MAX, INT64_MAX};
1601 InsrHelper(&masm, z1.VnD(), z1_inputs);
1602
1603 uint32_t z2_inputs[] = {0x12340042, 0, UINT32_MAX, 1, INT32_MAX, 0x80000000};
1604 InsrHelper(&masm, z2.VnS(), z2_inputs);
1605
1606 uint16_t z3_inputs[] = {0x122a, 0, 1, UINT16_MAX, 0x8000, INT16_MAX};
1607 InsrHelper(&masm, z3.VnH(), z3_inputs);
1608
1609 // The MacroAssembler implements non-destructive operations using movprfx.
1610 __ Uqdecp(z10.VnD(), p0, z0.VnD());
1611 __ Uqdecp(z11.VnD(), p0, z1.VnD());
1612 __ Uqdecp(z12.VnS(), p0, z2.VnS());
1613 __ Uqdecp(z13.VnH(), p0, z3.VnH());
1614
1615 __ Uqincp(z14.VnD(), p0, z0.VnD());
1616 __ Uqincp(z15.VnD(), p0, z1.VnD());
1617 __ Uqincp(z16.VnS(), p0, z2.VnS());
1618 __ Uqincp(z17.VnH(), p0, z3.VnH());
1619
1620 // Also test destructive forms.
1621 __ Mov(z4, z0);
1622 __ Mov(z5, z1);
1623 __ Mov(z6, z2);
1624 __ Mov(z7, z3);
1625
1626 __ Uqdecp(z0.VnD(), p0);
1627 __ Uqdecp(z1.VnD(), p0);
1628 __ Uqdecp(z2.VnS(), p0);
1629 __ Uqdecp(z3.VnH(), p0);
1630
1631 __ Uqincp(z4.VnD(), p0);
1632 __ Uqincp(z5.VnD(), p0);
1633 __ Uqincp(z6.VnS(), p0);
1634 __ Uqincp(z7.VnH(), p0);
1635
1636 END();
1637 if (CAN_RUN()) {
1638 RUN();
1639
1640 // z0_inputs[...] - number of active D lanes (2)
1641 uint64_t z0_expected[] = {0x1234567800000040, 0, 0, 0x7ffffffffffffffe};
1642 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1643
1644 // z1_inputs[...] - number of active D lanes (2)
1645 uint64_t z1_expected[] = {0x12345678ffffff28,
1646 0,
1647 0xfffffffffffffffd,
1648 0x7ffffffffffffffd};
1649 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1650
1651 // z2_inputs[...] - number of active S lanes (3)
1652 uint32_t z2_expected[] =
1653 {0x1234003f, 0, 0xfffffffc, 0, 0x7ffffffc, 0x7ffffffd};
1654 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1655
1656 // z3_inputs[...] - number of active H lanes (5)
1657 uint16_t z3_expected[] = {0x1225, 0, 0, 0xfffa, 0x7ffb, 0x7ffa};
1658 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1659
1660 // z0_inputs[...] + number of active D lanes (2)
1661 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1662 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1663
1664 // z1_inputs[...] + number of active D lanes (2)
1665 uint64_t z5_expected[] = {0x12345678ffffff2c,
1666 2,
1667 UINT64_MAX,
1668 0x8000000000000001};
1669 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1670
1671 // z2_inputs[...] + number of active S lanes (3)
1672 uint32_t z6_expected[] =
1673 {0x12340045, 3, UINT32_MAX, 4, 0x80000002, 0x80000003};
1674 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1675
1676 // z3_inputs[...] + number of active H lanes (5)
1677 uint16_t z7_expected[] = {0x122f, 5, 6, UINT16_MAX, 0x8005, 0x8004};
1678 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1679
1680 // Check that the non-destructive macros produced the same results.
1681 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1682 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1683 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1684 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1685 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1686 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1687 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1688 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1689 }
1690}
1691
Jacob Bramleye8289202019-07-31 11:25:23 +01001692TEST_SVE(sve_uqinc_uqdec_ptrue_vector) {
1693 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001694 START();
1695
1696 // With an all-true predicate, these instructions increment or decrement by
1697 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001698 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001699
1700 __ Mov(x0, 0x1234567800000000);
1701 __ Mov(x1, 0x12340000);
1702 __ Mov(x2, 0x1200);
1703
1704 __ Dup(z0.VnD(), x0);
1705 __ Uqdecp(z0.VnD(), p15);
1706
1707 __ Dup(z1.VnS(), x1);
1708 __ Uqdecp(z1.VnS(), p15);
1709
1710 __ Dup(z2.VnH(), x2);
1711 __ Uqdecp(z2.VnH(), p15);
1712
1713 __ Dup(z3.VnD(), x0);
1714 __ Uqincp(z3.VnD(), p15);
1715
1716 __ Dup(z4.VnS(), x1);
1717 __ Uqincp(z4.VnS(), p15);
1718
1719 __ Dup(z5.VnH(), x2);
1720 __ Uqincp(z5.VnH(), p15);
1721
1722 END();
1723 if (CAN_RUN()) {
1724 RUN();
1725
1726 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1727 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1728 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1729
1730 for (int i = 0; i < d_lane_count; i++) {
1731 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 - d_lane_count, z0.VnD(), i);
1732 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 + d_lane_count, z3.VnD(), i);
1733 }
1734
1735 for (int i = 0; i < s_lane_count; i++) {
1736 ASSERT_EQUAL_SVE_LANE(0x12340000 - s_lane_count, z1.VnS(), i);
1737 ASSERT_EQUAL_SVE_LANE(0x12340000 + s_lane_count, z4.VnS(), i);
1738 }
1739
1740 for (int i = 0; i < h_lane_count; i++) {
1741 ASSERT_EQUAL_SVE_LANE(0x1200 - h_lane_count, z2.VnH(), i);
1742 ASSERT_EQUAL_SVE_LANE(0x1200 + h_lane_count, z5.VnH(), i);
1743 }
1744 }
1745}
1746
Jacob Bramleye8289202019-07-31 11:25:23 +01001747TEST_SVE(sve_index) {
1748 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleycd8148c2019-07-11 18:43:20 +01001749 START();
1750
1751 // Simple cases.
1752 __ Index(z0.VnB(), 0, 1);
1753 __ Index(z1.VnH(), 1, 1);
1754 __ Index(z2.VnS(), 2, 1);
1755 __ Index(z3.VnD(), 3, 1);
1756
1757 // Synthesised immediates.
1758 __ Index(z4.VnB(), 42, -1);
1759 __ Index(z5.VnH(), -1, 42);
1760 __ Index(z6.VnS(), 42, 42);
1761
1762 // Register arguments.
1763 __ Mov(x0, 42);
1764 __ Mov(x1, -3);
1765 __ Index(z10.VnD(), x0, x1);
1766 __ Index(z11.VnB(), w0, w1);
1767 // The register size should correspond to the lane size, but VIXL allows any
1768 // register at least as big as the lane size.
1769 __ Index(z12.VnB(), x0, x1);
1770 __ Index(z13.VnH(), w0, x1);
1771 __ Index(z14.VnS(), x0, w1);
1772
1773 // Integer overflow.
1774 __ Index(z20.VnB(), UINT8_MAX - 2, 2);
1775 __ Index(z21.VnH(), 7, -3);
1776 __ Index(z22.VnS(), INT32_MAX - 2, 1);
1777 __ Index(z23.VnD(), INT64_MIN + 6, -7);
1778
1779 END();
1780
1781 if (CAN_RUN()) {
1782 RUN();
1783
1784 int b_lane_count = core.GetSVELaneCount(kBRegSize);
1785 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1786 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1787 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1788
1789 uint64_t b_mask = GetUintMask(kBRegSize);
1790 uint64_t h_mask = GetUintMask(kHRegSize);
1791 uint64_t s_mask = GetUintMask(kSRegSize);
1792 uint64_t d_mask = GetUintMask(kDRegSize);
1793
1794 // Simple cases.
1795 for (int i = 0; i < b_lane_count; i++) {
1796 ASSERT_EQUAL_SVE_LANE((0 + i) & b_mask, z0.VnB(), i);
1797 }
1798 for (int i = 0; i < h_lane_count; i++) {
1799 ASSERT_EQUAL_SVE_LANE((1 + i) & h_mask, z1.VnH(), i);
1800 }
1801 for (int i = 0; i < s_lane_count; i++) {
1802 ASSERT_EQUAL_SVE_LANE((2 + i) & s_mask, z2.VnS(), i);
1803 }
1804 for (int i = 0; i < d_lane_count; i++) {
1805 ASSERT_EQUAL_SVE_LANE((3 + i) & d_mask, z3.VnD(), i);
1806 }
1807
1808 // Synthesised immediates.
1809 for (int i = 0; i < b_lane_count; i++) {
1810 ASSERT_EQUAL_SVE_LANE((42 - i) & b_mask, z4.VnB(), i);
1811 }
1812 for (int i = 0; i < h_lane_count; i++) {
1813 ASSERT_EQUAL_SVE_LANE((-1 + (42 * i)) & h_mask, z5.VnH(), i);
1814 }
1815 for (int i = 0; i < s_lane_count; i++) {
1816 ASSERT_EQUAL_SVE_LANE((42 + (42 * i)) & s_mask, z6.VnS(), i);
1817 }
1818
1819 // Register arguments.
1820 for (int i = 0; i < d_lane_count; i++) {
1821 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & d_mask, z10.VnD(), i);
1822 }
1823 for (int i = 0; i < b_lane_count; i++) {
1824 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z11.VnB(), i);
1825 }
1826 for (int i = 0; i < b_lane_count; i++) {
1827 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z12.VnB(), i);
1828 }
1829 for (int i = 0; i < h_lane_count; i++) {
1830 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & h_mask, z13.VnH(), i);
1831 }
1832 for (int i = 0; i < s_lane_count; i++) {
1833 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & s_mask, z14.VnS(), i);
1834 }
1835
1836 // Integer overflow.
1837 uint8_t expected_z20[] = {0x05, 0x03, 0x01, 0xff, 0xfd};
1838 ASSERT_EQUAL_SVE(expected_z20, z20.VnB());
1839 uint16_t expected_z21[] = {0xfffb, 0xfffe, 0x0001, 0x0004, 0x0007};
1840 ASSERT_EQUAL_SVE(expected_z21, z21.VnH());
1841 uint32_t expected_z22[] = {0x80000000, 0x7fffffff, 0x7ffffffe, 0x7ffffffd};
1842 ASSERT_EQUAL_SVE(expected_z22, z22.VnS());
1843 uint64_t expected_z23[] = {0x7fffffffffffffff, 0x8000000000000006};
1844 ASSERT_EQUAL_SVE(expected_z23, z23.VnD());
1845 }
1846}
1847
TatWai Chongc844bb22019-06-10 15:32:53 -07001848TEST(sve_int_compare_count_and_limit_scalars) {
1849 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1850 START();
1851
1852 __ Mov(w20, 0xfffffffd);
1853 __ Mov(w21, 0xffffffff);
1854
1855 __ Whilele(p0.VnB(), w20, w21);
1856 __ Mrs(x0, NZCV);
1857 __ Whilele(p1.VnH(), w20, w21);
1858 __ Mrs(x1, NZCV);
1859
1860 __ Mov(w20, 0xffffffff);
1861 __ Mov(w21, 0x00000000);
1862
1863 __ Whilelt(p2.VnS(), w20, w21);
1864 __ Mrs(x2, NZCV);
1865 __ Whilelt(p3.VnD(), w20, w21);
1866 __ Mrs(x3, NZCV);
1867
1868 __ Mov(w20, 0xfffffffd);
1869 __ Mov(w21, 0xffffffff);
1870
1871 __ Whilels(p4.VnB(), w20, w21);
1872 __ Mrs(x4, NZCV);
1873 __ Whilels(p5.VnH(), w20, w21);
1874 __ Mrs(x5, NZCV);
1875
1876 __ Mov(w20, 0xffffffff);
1877 __ Mov(w21, 0x00000000);
1878
1879 __ Whilelo(p6.VnS(), w20, w21);
1880 __ Mrs(x6, NZCV);
1881 __ Whilelo(p7.VnD(), w20, w21);
1882 __ Mrs(x7, NZCV);
1883
1884 __ Mov(x20, 0xfffffffffffffffd);
1885 __ Mov(x21, 0xffffffffffffffff);
1886
1887 __ Whilele(p8.VnB(), x20, x21);
1888 __ Mrs(x8, NZCV);
1889 __ Whilele(p9.VnH(), x20, x21);
1890 __ Mrs(x9, NZCV);
1891
1892 __ Mov(x20, 0xffffffffffffffff);
1893 __ Mov(x21, 0x0000000000000000);
1894
1895 __ Whilelt(p10.VnS(), x20, x21);
1896 __ Mrs(x10, NZCV);
1897 __ Whilelt(p11.VnD(), x20, x21);
1898 __ Mrs(x11, NZCV);
1899
1900 __ Mov(x20, 0xfffffffffffffffd);
1901 __ Mov(x21, 0xffffffffffffffff);
1902
1903 __ Whilels(p12.VnB(), x20, x21);
1904 __ Mrs(x12, NZCV);
1905 __ Whilels(p13.VnH(), x20, x21);
1906 __ Mrs(x13, NZCV);
1907
1908 __ Mov(x20, 0xffffffffffffffff);
1909 __ Mov(x21, 0x0000000000000000);
1910
1911 __ Whilelo(p14.VnS(), x20, x21);
1912 __ Mrs(x14, NZCV);
1913 __ Whilelo(p15.VnD(), x20, x21);
1914 __ Mrs(x15, NZCV);
1915
1916 END();
1917
1918 if (CAN_RUN()) {
1919 RUN();
1920
1921 // 0b...00000000'00000111
1922 int p0_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
1923 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
1924
1925 // 0b...00000000'00010101
1926 int p1_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
1927 ASSERT_EQUAL_SVE(p1_expected, p1.VnH());
1928
1929 int p2_expected[] = {0x0, 0x0, 0x0, 0x1};
1930 ASSERT_EQUAL_SVE(p2_expected, p2.VnS());
1931
1932 int p3_expected[] = {0x00, 0x01};
1933 ASSERT_EQUAL_SVE(p3_expected, p3.VnD());
1934
1935 // 0b...11111111'11111111
1936 int p4_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1937 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
1938
1939 // 0b...01010101'01010101
1940 int p5_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
1941 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
1942
1943 int p6_expected[] = {0x0, 0x0, 0x0, 0x0};
1944 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
1945
1946 int p7_expected[] = {0x00, 0x00};
1947 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
1948
1949 // 0b...00000000'00000111
1950 int p8_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
1951 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
1952
1953 // 0b...00000000'00010101
1954 int p9_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
1955 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
1956
1957 int p10_expected[] = {0x0, 0x0, 0x0, 0x1};
1958 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
1959
1960 int p11_expected[] = {0x00, 0x01};
1961 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
1962
1963 // 0b...11111111'11111111
1964 int p12_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1965 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
1966
1967 // 0b...01010101'01010101
1968 int p13_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
1969 ASSERT_EQUAL_SVE(p13_expected, p13.VnH());
1970
1971 int p14_expected[] = {0x0, 0x0, 0x0, 0x0};
1972 ASSERT_EQUAL_SVE(p14_expected, p14.VnS());
1973
1974 int p15_expected[] = {0x00, 0x00};
1975 ASSERT_EQUAL_SVE(p15_expected, p15.VnD());
1976
1977 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w0);
1978 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w1);
1979 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w2);
1980 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w3);
1981 ASSERT_EQUAL_32(SVEFirstFlag, w4);
1982 ASSERT_EQUAL_32(SVEFirstFlag, w5);
1983 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w6);
1984 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w7);
1985 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w8);
1986 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w9);
1987 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
1988 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w11);
1989 ASSERT_EQUAL_32(SVEFirstFlag, w12);
1990 ASSERT_EQUAL_32(SVEFirstFlag, w13);
1991 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w14);
1992 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w15);
1993 }
1994}
1995
TatWai Chong302729c2019-06-14 16:18:51 -07001996TEST(sve_int_compare_vectors_signed_imm) {
1997 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1998 START();
1999
2000 int z13_inputs[] = {0, 1, -1, -15, 126, -127, -126, -15};
2001 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 1, 1};
2002 InsrHelper(&masm, z13.VnB(), z13_inputs);
2003 Initialise(&masm, p0.VnB(), mask_inputs1);
2004
2005 __ Cmpeq(p2.VnB(), p0.Zeroing(), z13.VnB(), -15);
2006 __ Mrs(x2, NZCV);
2007 __ Cmpeq(p3.VnB(), p0.Zeroing(), z13.VnB(), -127);
2008
2009 int z14_inputs[] = {0, 1, -1, -32767, -32766, 32767, 32766, 0};
2010 int mask_inputs2[] = {1, 1, 1, 0, 1, 1, 1, 1};
2011 InsrHelper(&masm, z14.VnH(), z14_inputs);
2012 Initialise(&masm, p0.VnH(), mask_inputs2);
2013
2014 __ Cmpge(p4.VnH(), p0.Zeroing(), z14.VnH(), -1);
2015 __ Mrs(x4, NZCV);
2016 __ Cmpge(p5.VnH(), p0.Zeroing(), z14.VnH(), -32767);
2017
2018 int z15_inputs[] = {0, 1, -1, INT_MIN};
2019 int mask_inputs3[] = {0, 1, 1, 1};
2020 InsrHelper(&masm, z15.VnS(), z15_inputs);
2021 Initialise(&masm, p0.VnS(), mask_inputs3);
2022
2023 __ Cmpgt(p6.VnS(), p0.Zeroing(), z15.VnS(), 0);
2024 __ Mrs(x6, NZCV);
2025 __ Cmpgt(p7.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2026
2027 __ Cmplt(p8.VnS(), p0.Zeroing(), z15.VnS(), 0);
2028 __ Mrs(x8, NZCV);
2029 __ Cmplt(p9.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2030
2031 int64_t z16_inputs[] = {0, -1};
2032 int mask_inputs4[] = {1, 1};
2033 InsrHelper(&masm, z16.VnD(), z16_inputs);
2034 Initialise(&masm, p0.VnD(), mask_inputs4);
2035
2036 __ Cmple(p10.VnD(), p0.Zeroing(), z16.VnD(), -1);
2037 __ Mrs(x10, NZCV);
2038 __ Cmple(p11.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MIN);
2039
2040 __ Cmpne(p12.VnD(), p0.Zeroing(), z16.VnD(), -1);
2041 __ Mrs(x12, NZCV);
2042 __ Cmpne(p13.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MAX);
2043
2044 END();
2045
2046 if (CAN_RUN()) {
2047 RUN();
2048
2049 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1};
2050 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2051
2052 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 0};
2053 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2054
2055 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1, 0x1};
2056 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2057
2058 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1};
2059 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2060
2061 int p6_expected[] = {0x0, 0x1, 0x0, 0x0};
2062 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2063
2064 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
2065 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2066
2067 int p8_expected[] = {0x0, 0x0, 0x1, 0x1};
2068 ASSERT_EQUAL_SVE(p8_expected, p8.VnS());
2069
2070 int p9_expected[] = {0x0, 0x0, 0x0, 0x1};
2071 ASSERT_EQUAL_SVE(p9_expected, p9.VnS());
2072
2073 int p10_expected[] = {0x00, 0x01};
2074 ASSERT_EQUAL_SVE(p10_expected, p10.VnD());
2075
2076 int p11_expected[] = {0x00, 0x00};
2077 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
2078
2079 int p12_expected[] = {0x01, 0x00};
2080 ASSERT_EQUAL_SVE(p12_expected, p12.VnD());
2081
2082 int p13_expected[] = {0x01, 0x01};
2083 ASSERT_EQUAL_SVE(p13_expected, p13.VnD());
2084
2085 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w2);
2086 ASSERT_EQUAL_32(SVEFirstFlag, w4);
2087 ASSERT_EQUAL_32(NoFlag, w6);
2088 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2089 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w10);
2090 ASSERT_EQUAL_32(NoFlag, w12);
2091 }
2092}
2093
2094TEST(sve_int_compare_vectors_unsigned_imm) {
2095 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2096 START();
2097
2098 uint32_t src1_inputs[] = {0xf7, 0x0f, 0x8f, 0x1f, 0x83, 0x12, 0x00, 0xf1};
2099 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 0, 1};
2100 InsrHelper(&masm, z13.VnB(), src1_inputs);
2101 Initialise(&masm, p0.VnB(), mask_inputs1);
2102
2103 __ Cmphi(p2.VnB(), p0.Zeroing(), z13.VnB(), 0x0f);
2104 __ Mrs(x2, NZCV);
2105 __ Cmphi(p3.VnB(), p0.Zeroing(), z13.VnB(), 0xf0);
2106
2107 uint32_t src2_inputs[] = {0xffff, 0x8000, 0x1fff, 0x0000, 0x1234};
2108 int mask_inputs2[] = {1, 1, 1, 1, 0};
2109 InsrHelper(&masm, z13.VnH(), src2_inputs);
2110 Initialise(&masm, p0.VnH(), mask_inputs2);
2111
2112 __ Cmphs(p4.VnH(), p0.Zeroing(), z13.VnH(), 0x1f);
2113 __ Mrs(x4, NZCV);
2114 __ Cmphs(p5.VnH(), p0.Zeroing(), z13.VnH(), 0x1fff);
2115
2116 uint32_t src3_inputs[] = {0xffffffff, 0xfedcba98, 0x0000ffff, 0x00000000};
2117 int mask_inputs3[] = {1, 1, 1, 1};
2118 InsrHelper(&masm, z13.VnS(), src3_inputs);
2119 Initialise(&masm, p0.VnS(), mask_inputs3);
2120
2121 __ Cmplo(p6.VnS(), p0.Zeroing(), z13.VnS(), 0x3f);
2122 __ Mrs(x6, NZCV);
2123 __ Cmplo(p7.VnS(), p0.Zeroing(), z13.VnS(), 0x3f3f3f3f);
2124
2125 uint64_t src4_inputs[] = {0xffffffffffffffff, 0x0000000000000000};
2126 int mask_inputs4[] = {1, 1};
2127 InsrHelper(&masm, z13.VnD(), src4_inputs);
2128 Initialise(&masm, p0.VnD(), mask_inputs4);
2129
2130 __ Cmpls(p8.VnD(), p0.Zeroing(), z13.VnD(), 0x2f);
2131 __ Mrs(x8, NZCV);
2132 __ Cmpls(p9.VnD(), p0.Zeroing(), z13.VnD(), 0x800000000000000);
2133
2134 END();
2135
2136 if (CAN_RUN()) {
2137 RUN();
2138
2139 int p2_expected[] = {1, 0, 1, 0, 1, 1, 0, 1};
2140 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2141
2142 int p3_expected[] = {1, 0, 0, 0, 0, 0, 0, 1};
2143 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2144
2145 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2146 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2147
2148 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2149 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2150
2151 int p6_expected[] = {0x0, 0x0, 0x0, 0x1};
2152 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2153
2154 int p7_expected[] = {0x0, 0x0, 0x1, 0x1};
2155 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2156
2157 int p8_expected[] = {0x00, 0x01};
2158 ASSERT_EQUAL_SVE(p8_expected, p8.VnD());
2159
2160 int p9_expected[] = {0x00, 0x01};
2161 ASSERT_EQUAL_SVE(p9_expected, p9.VnD());
2162
2163 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2164 ASSERT_EQUAL_32(NoFlag, w4);
2165 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w6);
2166 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2167 }
2168}
2169
TatWai Chongc844bb22019-06-10 15:32:53 -07002170TEST(sve_int_compare_conditionally_terminate_scalars) {
2171 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2172 START();
2173
2174 __ Mov(x0, 0xfedcba9887654321);
2175 __ Mov(x1, 0x1000100010001000);
2176
2177 __ Ctermeq(w0, w0);
2178 __ Mrs(x2, NZCV);
2179 __ Ctermeq(x0, x1);
2180 __ Mrs(x3, NZCV);
2181 __ Ctermne(x0, x0);
2182 __ Mrs(x4, NZCV);
2183 __ Ctermne(w0, w1);
2184 __ Mrs(x5, NZCV);
2185
2186 END();
2187
2188 if (CAN_RUN()) {
2189 RUN();
2190
2191 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2192 ASSERT_EQUAL_32(VFlag, w3);
2193 ASSERT_EQUAL_32(VFlag, w4);
2194 ASSERT_EQUAL_32(SVEFirstFlag, w5);
2195 }
2196}
2197
Jacob Bramley0ce75842019-07-17 18:12:50 +01002198// Work out what the architectural `PredTest` pseudocode should produce for the
2199// given result and governing predicate.
2200template <typename Tg, typename Td, int N>
2201static StatusFlags GetPredTestFlags(const Td (&pd)[N],
2202 const Tg (&pg)[N],
2203 int vl) {
2204 int first = -1;
2205 int last = -1;
2206 bool any_active = false;
2207
2208 // Only consider potentially-active lanes.
2209 int start = (N > vl) ? (N - vl) : 0;
2210 for (int i = start; i < N; i++) {
2211 if ((pg[i] & 1) == 1) {
2212 // Look for the first and last active lanes.
2213 // Note that the 'first' lane is the one with the highest index.
2214 if (last < 0) last = i;
2215 first = i;
2216 // Look for any active lanes that are also active in pd.
2217 if ((pd[i] & 1) == 1) any_active = true;
2218 }
2219 }
2220
2221 uint32_t flags = 0;
2222 if ((first >= 0) && ((pd[first] & 1) == 1)) flags |= SVEFirstFlag;
2223 if (!any_active) flags |= SVENoneFlag;
2224 if ((last < 0) || ((pd[last] & 1) == 0)) flags |= SVENotLastFlag;
2225 return static_cast<StatusFlags>(flags);
2226}
2227
2228typedef void (MacroAssembler::*PfirstPnextFn)(const PRegisterWithLaneSize& pd,
2229 const PRegister& pg,
2230 const PRegisterWithLaneSize& pn);
2231template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002232static void PfirstPnextHelper(Test* config,
2233 PfirstPnextFn macro,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002234 unsigned lane_size_in_bits,
2235 const Tg& pg_inputs,
2236 const Tn& pn_inputs,
2237 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002238 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002239 START();
2240
2241 PRegister pg = p15;
2242 PRegister pn = p14;
2243 Initialise(&masm, pg.WithLaneSize(lane_size_in_bits), pg_inputs);
2244 Initialise(&masm, pn.WithLaneSize(lane_size_in_bits), pn_inputs);
2245
2246 // Initialise NZCV to an impossible value, to check that we actually write it.
2247 __ Mov(x10, NZCVFlag);
2248
2249 // If pd.Is(pn), the MacroAssembler simply passes the arguments directly to
2250 // the Assembler.
2251 __ Msr(NZCV, x10);
2252 __ Mov(p0, pn);
2253 (masm.*macro)(p0.WithLaneSize(lane_size_in_bits),
2254 pg,
2255 p0.WithLaneSize(lane_size_in_bits));
2256 __ Mrs(x0, NZCV);
2257
2258 // The MacroAssembler supports non-destructive use.
2259 __ Msr(NZCV, x10);
2260 (masm.*macro)(p1.WithLaneSize(lane_size_in_bits),
2261 pg,
2262 pn.WithLaneSize(lane_size_in_bits));
2263 __ Mrs(x1, NZCV);
2264
2265 // If pd.Aliases(pg) the macro requires a scratch register.
2266 {
2267 UseScratchRegisterScope temps(&masm);
2268 temps.Include(p13);
2269 __ Msr(NZCV, x10);
2270 __ Mov(p2, p15);
2271 (masm.*macro)(p2.WithLaneSize(lane_size_in_bits),
2272 p2,
2273 pn.WithLaneSize(lane_size_in_bits));
2274 __ Mrs(x2, NZCV);
2275 }
2276
2277 END();
2278
2279 if (CAN_RUN()) {
2280 RUN();
2281
2282 // Check that the inputs weren't modified.
2283 ASSERT_EQUAL_SVE(pn_inputs, pn.WithLaneSize(lane_size_in_bits));
2284 ASSERT_EQUAL_SVE(pg_inputs, pg.WithLaneSize(lane_size_in_bits));
2285
2286 // Check the primary operation.
2287 ASSERT_EQUAL_SVE(pd_expected, p0.WithLaneSize(lane_size_in_bits));
2288 ASSERT_EQUAL_SVE(pd_expected, p1.WithLaneSize(lane_size_in_bits));
2289 ASSERT_EQUAL_SVE(pd_expected, p2.WithLaneSize(lane_size_in_bits));
2290
2291 // Check that the flags were properly set.
2292 StatusFlags nzcv_expected =
2293 GetPredTestFlags(pd_expected,
2294 pg_inputs,
2295 core.GetSVELaneCount(kBRegSize));
2296 ASSERT_EQUAL_64(nzcv_expected, x0);
2297 ASSERT_EQUAL_64(nzcv_expected, x1);
2298 ASSERT_EQUAL_64(nzcv_expected, x2);
2299 }
2300}
2301
2302template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002303static void PfirstHelper(Test* config,
2304 const Tg& pg_inputs,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002305 const Tn& pn_inputs,
2306 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002307 PfirstPnextHelper(config,
2308 &MacroAssembler::Pfirst,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002309 kBRegSize, // pfirst only accepts B-sized lanes.
2310 pg_inputs,
2311 pn_inputs,
2312 pd_expected);
2313}
2314
2315template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002316static void PnextHelper(Test* config,
2317 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002318 const Tg& pg_inputs,
2319 const Tn& pn_inputs,
2320 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002321 PfirstPnextHelper(config,
2322 &MacroAssembler::Pnext,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002323 lane_size_in_bits,
2324 pg_inputs,
2325 pn_inputs,
2326 pd_expected);
2327}
2328
Jacob Bramleye8289202019-07-31 11:25:23 +01002329TEST_SVE(sve_pfirst) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002330 // Provide more lanes than kPRegMinSize (to check propagation if we have a
2331 // large VL), but few enough to make the test easy to read.
2332 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2333 int in1[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2334 int in2[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2335 int in3[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2336 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2337 VIXL_ASSERT(ArrayLength(in0) > kPRegMinSize);
2338
2339 // Pfirst finds the first active lane in pg, and activates the corresponding
2340 // lane in pn (if it isn't already active).
2341
2342 // The first active lane in in1 is here. |
2343 // v
2344 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2345 int exp12[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0};
2346 int exp13[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2347 int exp14[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002348 PfirstHelper(config, in1, in0, exp10);
2349 PfirstHelper(config, in1, in2, exp12);
2350 PfirstHelper(config, in1, in3, exp13);
2351 PfirstHelper(config, in1, in4, exp14);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002352
2353 // The first active lane in in2 is here. |
2354 // v
2355 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2356 int exp21[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0};
2357 int exp23[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2358 int exp24[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002359 PfirstHelper(config, in2, in0, exp20);
2360 PfirstHelper(config, in2, in1, exp21);
2361 PfirstHelper(config, in2, in3, exp23);
2362 PfirstHelper(config, in2, in4, exp24);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002363
2364 // The first active lane in in3 is here. |
2365 // v
2366 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2367 int exp31[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
2368 int exp32[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1};
2369 int exp34[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002370 PfirstHelper(config, in3, in0, exp30);
2371 PfirstHelper(config, in3, in1, exp31);
2372 PfirstHelper(config, in3, in2, exp32);
2373 PfirstHelper(config, in3, in4, exp34);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002374
2375 // | The first active lane in in4 is here.
2376 // v
2377 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2378 int exp41[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2379 int exp42[] = {1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2380 int exp43[] = {1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002381 PfirstHelper(config, in4, in0, exp40);
2382 PfirstHelper(config, in4, in1, exp41);
2383 PfirstHelper(config, in4, in2, exp42);
2384 PfirstHelper(config, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002385
2386 // If pg is all inactive, the input is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002387 PfirstHelper(config, in0, in0, in0);
2388 PfirstHelper(config, in0, in1, in1);
2389 PfirstHelper(config, in0, in2, in2);
2390 PfirstHelper(config, in0, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002391
2392 // If the values of pg and pn match, the value is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002393 PfirstHelper(config, in0, in0, in0);
2394 PfirstHelper(config, in1, in1, in1);
2395 PfirstHelper(config, in2, in2, in2);
2396 PfirstHelper(config, in3, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002397}
2398
Jacob Bramleye8289202019-07-31 11:25:23 +01002399TEST_SVE(sve_pfirst_alias) {
2400 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002401 START();
2402
2403 // Check that the Simulator behaves correctly when all arguments are aliased.
2404 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2405 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2406 int in_s[] = {0, 1, 1, 0};
2407 int in_d[] = {1, 1};
2408
2409 Initialise(&masm, p0.VnB(), in_b);
2410 Initialise(&masm, p1.VnH(), in_h);
2411 Initialise(&masm, p2.VnS(), in_s);
2412 Initialise(&masm, p3.VnD(), in_d);
2413
2414 // Initialise NZCV to an impossible value, to check that we actually write it.
2415 __ Mov(x10, NZCVFlag);
2416
2417 __ Msr(NZCV, x10);
2418 __ Pfirst(p0.VnB(), p0.VnB(), p0.VnB());
2419 __ Mrs(x0, NZCV);
2420
2421 __ Msr(NZCV, x10);
2422 __ Pfirst(p1.VnB(), p1.VnB(), p1.VnB());
2423 __ Mrs(x1, NZCV);
2424
2425 __ Msr(NZCV, x10);
2426 __ Pfirst(p2.VnB(), p2.VnB(), p2.VnB());
2427 __ Mrs(x2, NZCV);
2428
2429 __ Msr(NZCV, x10);
2430 __ Pfirst(p3.VnB(), p3.VnB(), p3.VnB());
2431 __ Mrs(x3, NZCV);
2432
2433 END();
2434
2435 if (CAN_RUN()) {
2436 RUN();
2437
2438 // The first lane from pg is already active in pdn, so the P register should
2439 // be unchanged.
2440 ASSERT_EQUAL_SVE(in_b, p0.VnB());
2441 ASSERT_EQUAL_SVE(in_h, p1.VnH());
2442 ASSERT_EQUAL_SVE(in_s, p2.VnS());
2443 ASSERT_EQUAL_SVE(in_d, p3.VnD());
2444
2445 ASSERT_EQUAL_64(SVEFirstFlag, x0);
2446 ASSERT_EQUAL_64(SVEFirstFlag, x1);
2447 ASSERT_EQUAL_64(SVEFirstFlag, x2);
2448 ASSERT_EQUAL_64(SVEFirstFlag, x3);
2449 }
2450}
2451
Jacob Bramleye8289202019-07-31 11:25:23 +01002452TEST_SVE(sve_pnext_b) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002453 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2454 // (to check propagation if we have a large VL), but few enough to make the
2455 // test easy to read.
2456 // For now, we just use kPRegMinSize so that the test works anywhere.
2457 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2458 int in1[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2459 int in2[] = {0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2460 int in3[] = {0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1};
2461 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2462
2463 // Pnext activates the next element that is true in pg, after the last-active
2464 // element in pn. If all pn elements are false (as in in0), it starts looking
2465 // at element 0.
2466
2467 // There are no active lanes in in0, so the result is simply the first active
2468 // lane from pg.
2469 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2470 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2471 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2472 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2473 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2474
2475 // The last active lane in in1 is here. |
2476 // v
2477 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2478 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2479 int exp21[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2480 int exp31[] = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2481 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2482
2483 // | The last active lane in in2 is here.
2484 // v
2485 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2486 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2487 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2488 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2489 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2490
2491 // | The last active lane in in3 is here.
2492 // v
2493 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2494 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2495 int exp23[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2496 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2497 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2498
2499 // | The last active lane in in4 is here.
2500 // v
2501 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2502 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2503 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2504 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2505 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2506
Jacob Bramleye8289202019-07-31 11:25:23 +01002507 PnextHelper(config, kBRegSize, in0, in0, exp00);
2508 PnextHelper(config, kBRegSize, in1, in0, exp10);
2509 PnextHelper(config, kBRegSize, in2, in0, exp20);
2510 PnextHelper(config, kBRegSize, in3, in0, exp30);
2511 PnextHelper(config, kBRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002512
Jacob Bramleye8289202019-07-31 11:25:23 +01002513 PnextHelper(config, kBRegSize, in0, in1, exp01);
2514 PnextHelper(config, kBRegSize, in1, in1, exp11);
2515 PnextHelper(config, kBRegSize, in2, in1, exp21);
2516 PnextHelper(config, kBRegSize, in3, in1, exp31);
2517 PnextHelper(config, kBRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002518
Jacob Bramleye8289202019-07-31 11:25:23 +01002519 PnextHelper(config, kBRegSize, in0, in2, exp02);
2520 PnextHelper(config, kBRegSize, in1, in2, exp12);
2521 PnextHelper(config, kBRegSize, in2, in2, exp22);
2522 PnextHelper(config, kBRegSize, in3, in2, exp32);
2523 PnextHelper(config, kBRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002524
Jacob Bramleye8289202019-07-31 11:25:23 +01002525 PnextHelper(config, kBRegSize, in0, in3, exp03);
2526 PnextHelper(config, kBRegSize, in1, in3, exp13);
2527 PnextHelper(config, kBRegSize, in2, in3, exp23);
2528 PnextHelper(config, kBRegSize, in3, in3, exp33);
2529 PnextHelper(config, kBRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002530
Jacob Bramleye8289202019-07-31 11:25:23 +01002531 PnextHelper(config, kBRegSize, in0, in4, exp04);
2532 PnextHelper(config, kBRegSize, in1, in4, exp14);
2533 PnextHelper(config, kBRegSize, in2, in4, exp24);
2534 PnextHelper(config, kBRegSize, in3, in4, exp34);
2535 PnextHelper(config, kBRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002536}
2537
Jacob Bramleye8289202019-07-31 11:25:23 +01002538TEST_SVE(sve_pnext_h) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002539 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2540 // (to check propagation if we have a large VL), but few enough to make the
2541 // test easy to read.
2542 // For now, we just use kPRegMinSize so that the test works anywhere.
2543 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0};
2544 int in1[] = {0, 0, 0, 1, 0, 2, 1, 0};
2545 int in2[] = {0, 1, 2, 0, 2, 0, 2, 0};
2546 int in3[] = {0, 0, 0, 3, 0, 0, 0, 3};
2547 int in4[] = {3, 0, 0, 0, 0, 0, 0, 0};
2548
2549 // Pnext activates the next element that is true in pg, after the last-active
2550 // element in pn. If all pn elements are false (as in in0), it starts looking
2551 // at element 0.
2552 //
2553 // As for other SVE instructions, elements are only considered to be active if
2554 // the _first_ bit in each field is one. Other bits are ignored.
2555
2556 // There are no active lanes in in0, so the result is simply the first active
2557 // lane from pg.
2558 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0};
2559 int exp10[] = {0, 0, 0, 0, 0, 0, 1, 0};
2560 int exp20[] = {0, 1, 0, 0, 0, 0, 0, 0};
2561 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 1};
2562 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0};
2563
2564 // | The last active lane in in1 is here.
2565 // v
2566 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0};
2567 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0};
2568 int exp21[] = {0, 1, 0, 0, 0, 0, 0, 0};
2569 int exp31[] = {0, 0, 0, 0, 0, 0, 0, 0};
2570 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0};
2571
2572 // | The last active lane in in2 is here.
2573 // v
2574 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0};
2575 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0};
2576 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0};
2577 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0};
2578 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0};
2579
2580 // | The last active lane in in3 is here.
2581 // v
2582 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0};
2583 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0};
2584 int exp23[] = {0, 1, 0, 0, 0, 0, 0, 0};
2585 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0};
2586 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0};
2587
2588 // | The last active lane in in4 is here.
2589 // v
2590 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0};
2591 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0};
2592 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0};
2593 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0};
2594 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0};
2595
Jacob Bramleye8289202019-07-31 11:25:23 +01002596 PnextHelper(config, kHRegSize, in0, in0, exp00);
2597 PnextHelper(config, kHRegSize, in1, in0, exp10);
2598 PnextHelper(config, kHRegSize, in2, in0, exp20);
2599 PnextHelper(config, kHRegSize, in3, in0, exp30);
2600 PnextHelper(config, kHRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002601
Jacob Bramleye8289202019-07-31 11:25:23 +01002602 PnextHelper(config, kHRegSize, in0, in1, exp01);
2603 PnextHelper(config, kHRegSize, in1, in1, exp11);
2604 PnextHelper(config, kHRegSize, in2, in1, exp21);
2605 PnextHelper(config, kHRegSize, in3, in1, exp31);
2606 PnextHelper(config, kHRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002607
Jacob Bramleye8289202019-07-31 11:25:23 +01002608 PnextHelper(config, kHRegSize, in0, in2, exp02);
2609 PnextHelper(config, kHRegSize, in1, in2, exp12);
2610 PnextHelper(config, kHRegSize, in2, in2, exp22);
2611 PnextHelper(config, kHRegSize, in3, in2, exp32);
2612 PnextHelper(config, kHRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002613
Jacob Bramleye8289202019-07-31 11:25:23 +01002614 PnextHelper(config, kHRegSize, in0, in3, exp03);
2615 PnextHelper(config, kHRegSize, in1, in3, exp13);
2616 PnextHelper(config, kHRegSize, in2, in3, exp23);
2617 PnextHelper(config, kHRegSize, in3, in3, exp33);
2618 PnextHelper(config, kHRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002619
Jacob Bramleye8289202019-07-31 11:25:23 +01002620 PnextHelper(config, kHRegSize, in0, in4, exp04);
2621 PnextHelper(config, kHRegSize, in1, in4, exp14);
2622 PnextHelper(config, kHRegSize, in2, in4, exp24);
2623 PnextHelper(config, kHRegSize, in3, in4, exp34);
2624 PnextHelper(config, kHRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002625}
2626
Jacob Bramleye8289202019-07-31 11:25:23 +01002627TEST_SVE(sve_pnext_s) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002628 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2629 // (to check propagation if we have a large VL), but few enough to make the
2630 // test easy to read.
2631 // For now, we just use kPRegMinSize so that the test works anywhere.
2632 int in0[] = {0xe, 0xc, 0x8, 0x0};
2633 int in1[] = {0x0, 0x2, 0x0, 0x1};
2634 int in2[] = {0x0, 0x1, 0xf, 0x0};
2635 int in3[] = {0xf, 0x0, 0x0, 0x0};
2636
2637 // Pnext activates the next element that is true in pg, after the last-active
2638 // element in pn. If all pn elements are false (as in in0), it starts looking
2639 // at element 0.
2640 //
2641 // As for other SVE instructions, elements are only considered to be active if
2642 // the _first_ bit in each field is one. Other bits are ignored.
2643
2644 // There are no active lanes in in0, so the result is simply the first active
2645 // lane from pg.
2646 int exp00[] = {0, 0, 0, 0};
2647 int exp10[] = {0, 0, 0, 1};
2648 int exp20[] = {0, 0, 1, 0};
2649 int exp30[] = {1, 0, 0, 0};
2650
2651 // | The last active lane in in1 is here.
2652 // v
2653 int exp01[] = {0, 0, 0, 0};
2654 int exp11[] = {0, 0, 0, 0};
2655 int exp21[] = {0, 0, 1, 0};
2656 int exp31[] = {1, 0, 0, 0};
2657
2658 // | The last active lane in in2 is here.
2659 // v
2660 int exp02[] = {0, 0, 0, 0};
2661 int exp12[] = {0, 0, 0, 0};
2662 int exp22[] = {0, 0, 0, 0};
2663 int exp32[] = {1, 0, 0, 0};
2664
2665 // | The last active lane in in3 is here.
2666 // v
2667 int exp03[] = {0, 0, 0, 0};
2668 int exp13[] = {0, 0, 0, 0};
2669 int exp23[] = {0, 0, 0, 0};
2670 int exp33[] = {0, 0, 0, 0};
2671
Jacob Bramleye8289202019-07-31 11:25:23 +01002672 PnextHelper(config, kSRegSize, in0, in0, exp00);
2673 PnextHelper(config, kSRegSize, in1, in0, exp10);
2674 PnextHelper(config, kSRegSize, in2, in0, exp20);
2675 PnextHelper(config, kSRegSize, in3, in0, exp30);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002676
Jacob Bramleye8289202019-07-31 11:25:23 +01002677 PnextHelper(config, kSRegSize, in0, in1, exp01);
2678 PnextHelper(config, kSRegSize, in1, in1, exp11);
2679 PnextHelper(config, kSRegSize, in2, in1, exp21);
2680 PnextHelper(config, kSRegSize, in3, in1, exp31);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002681
Jacob Bramleye8289202019-07-31 11:25:23 +01002682 PnextHelper(config, kSRegSize, in0, in2, exp02);
2683 PnextHelper(config, kSRegSize, in1, in2, exp12);
2684 PnextHelper(config, kSRegSize, in2, in2, exp22);
2685 PnextHelper(config, kSRegSize, in3, in2, exp32);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002686
Jacob Bramleye8289202019-07-31 11:25:23 +01002687 PnextHelper(config, kSRegSize, in0, in3, exp03);
2688 PnextHelper(config, kSRegSize, in1, in3, exp13);
2689 PnextHelper(config, kSRegSize, in2, in3, exp23);
2690 PnextHelper(config, kSRegSize, in3, in3, exp33);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002691}
2692
Jacob Bramleye8289202019-07-31 11:25:23 +01002693TEST_SVE(sve_pnext_d) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002694 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2695 // (to check propagation if we have a large VL), but few enough to make the
2696 // test easy to read.
2697 // For now, we just use kPRegMinSize so that the test works anywhere.
2698 int in0[] = {0xfe, 0xf0};
2699 int in1[] = {0x00, 0x55};
2700 int in2[] = {0x33, 0xff};
2701
2702 // Pnext activates the next element that is true in pg, after the last-active
2703 // element in pn. If all pn elements are false (as in in0), it starts looking
2704 // at element 0.
2705 //
2706 // As for other SVE instructions, elements are only considered to be active if
2707 // the _first_ bit in each field is one. Other bits are ignored.
2708
2709 // There are no active lanes in in0, so the result is simply the first active
2710 // lane from pg.
2711 int exp00[] = {0, 0};
2712 int exp10[] = {0, 1};
2713 int exp20[] = {0, 1};
2714
2715 // | The last active lane in in1 is here.
2716 // v
2717 int exp01[] = {0, 0};
2718 int exp11[] = {0, 0};
2719 int exp21[] = {1, 0};
2720
2721 // | The last active lane in in2 is here.
2722 // v
2723 int exp02[] = {0, 0};
2724 int exp12[] = {0, 0};
2725 int exp22[] = {0, 0};
2726
Jacob Bramleye8289202019-07-31 11:25:23 +01002727 PnextHelper(config, kDRegSize, in0, in0, exp00);
2728 PnextHelper(config, kDRegSize, in1, in0, exp10);
2729 PnextHelper(config, kDRegSize, in2, in0, exp20);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002730
Jacob Bramleye8289202019-07-31 11:25:23 +01002731 PnextHelper(config, kDRegSize, in0, in1, exp01);
2732 PnextHelper(config, kDRegSize, in1, in1, exp11);
2733 PnextHelper(config, kDRegSize, in2, in1, exp21);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002734
Jacob Bramleye8289202019-07-31 11:25:23 +01002735 PnextHelper(config, kDRegSize, in0, in2, exp02);
2736 PnextHelper(config, kDRegSize, in1, in2, exp12);
2737 PnextHelper(config, kDRegSize, in2, in2, exp22);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002738}
2739
Jacob Bramleye8289202019-07-31 11:25:23 +01002740TEST_SVE(sve_pnext_alias) {
2741 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002742 START();
2743
2744 // Check that the Simulator behaves correctly when all arguments are aliased.
2745 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2746 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2747 int in_s[] = {0, 1, 1, 0};
2748 int in_d[] = {1, 1};
2749
2750 Initialise(&masm, p0.VnB(), in_b);
2751 Initialise(&masm, p1.VnH(), in_h);
2752 Initialise(&masm, p2.VnS(), in_s);
2753 Initialise(&masm, p3.VnD(), in_d);
2754
2755 // Initialise NZCV to an impossible value, to check that we actually write it.
2756 __ Mov(x10, NZCVFlag);
2757
2758 __ Msr(NZCV, x10);
2759 __ Pnext(p0.VnB(), p0.VnB(), p0.VnB());
2760 __ Mrs(x0, NZCV);
2761
2762 __ Msr(NZCV, x10);
2763 __ Pnext(p1.VnB(), p1.VnB(), p1.VnB());
2764 __ Mrs(x1, NZCV);
2765
2766 __ Msr(NZCV, x10);
2767 __ Pnext(p2.VnB(), p2.VnB(), p2.VnB());
2768 __ Mrs(x2, NZCV);
2769
2770 __ Msr(NZCV, x10);
2771 __ Pnext(p3.VnB(), p3.VnB(), p3.VnB());
2772 __ Mrs(x3, NZCV);
2773
2774 END();
2775
2776 if (CAN_RUN()) {
2777 RUN();
2778
2779 // Since pg.Is(pdn), there can be no active lanes in pg above the last
2780 // active lane in pdn, so the result should always be zero.
2781 ASSERT_EQUAL_SVE(0, p0.VnB());
2782 ASSERT_EQUAL_SVE(0, p1.VnH());
2783 ASSERT_EQUAL_SVE(0, p2.VnS());
2784 ASSERT_EQUAL_SVE(0, p3.VnD());
2785
2786 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x0);
2787 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x1);
2788 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x2);
2789 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x3);
2790 }
2791}
2792
Jacob Bramleye8289202019-07-31 11:25:23 +01002793static void PtrueHelper(Test* config,
2794 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002795 FlagsUpdate s = LeaveFlags) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002796 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002797 START();
2798
2799 PRegisterWithLaneSize p[kNumberOfPRegisters];
2800 for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
2801 p[i] = PRegister(i).WithLaneSize(lane_size_in_bits);
2802 }
2803
2804 // Initialise NZCV to an impossible value, to check that we actually write it.
2805 StatusFlags nzcv_unmodified = NZCVFlag;
2806 __ Mov(x20, nzcv_unmodified);
2807
2808 // We don't have enough registers to conveniently test every pattern, so take
2809 // samples from each group.
2810 __ Msr(NZCV, x20);
2811 __ Ptrue(p[0], SVE_POW2, s);
2812 __ Mrs(x0, NZCV);
2813
2814 __ Msr(NZCV, x20);
2815 __ Ptrue(p[1], SVE_VL1, s);
2816 __ Mrs(x1, NZCV);
2817
2818 __ Msr(NZCV, x20);
2819 __ Ptrue(p[2], SVE_VL2, s);
2820 __ Mrs(x2, NZCV);
2821
2822 __ Msr(NZCV, x20);
2823 __ Ptrue(p[3], SVE_VL5, s);
2824 __ Mrs(x3, NZCV);
2825
2826 __ Msr(NZCV, x20);
2827 __ Ptrue(p[4], SVE_VL6, s);
2828 __ Mrs(x4, NZCV);
2829
2830 __ Msr(NZCV, x20);
2831 __ Ptrue(p[5], SVE_VL8, s);
2832 __ Mrs(x5, NZCV);
2833
2834 __ Msr(NZCV, x20);
2835 __ Ptrue(p[6], SVE_VL16, s);
2836 __ Mrs(x6, NZCV);
2837
2838 __ Msr(NZCV, x20);
2839 __ Ptrue(p[7], SVE_VL64, s);
2840 __ Mrs(x7, NZCV);
2841
2842 __ Msr(NZCV, x20);
2843 __ Ptrue(p[8], SVE_VL256, s);
2844 __ Mrs(x8, NZCV);
2845
2846 {
2847 // We have to use the Assembler to use values not defined by
2848 // SVEPredicateConstraint, so call `ptrues` directly..
2849 typedef void (
2850 MacroAssembler::*AssemblePtrueFn)(const PRegisterWithLaneSize& pd,
2851 int pattern);
2852 AssemblePtrueFn assemble =
2853 (s == SetFlags) ? &MacroAssembler::ptrues : &MacroAssembler::ptrue;
2854
2855 ExactAssemblyScope guard(&masm, 12 * kInstructionSize);
2856 __ msr(NZCV, x20);
2857 (masm.*assemble)(p[9], 0xe);
2858 __ mrs(x9, NZCV);
2859
2860 __ msr(NZCV, x20);
2861 (masm.*assemble)(p[10], 0x16);
2862 __ mrs(x10, NZCV);
2863
2864 __ msr(NZCV, x20);
2865 (masm.*assemble)(p[11], 0x1a);
2866 __ mrs(x11, NZCV);
2867
2868 __ msr(NZCV, x20);
2869 (masm.*assemble)(p[12], 0x1c);
2870 __ mrs(x12, NZCV);
2871 }
2872
2873 __ Msr(NZCV, x20);
2874 __ Ptrue(p[13], SVE_MUL4, s);
2875 __ Mrs(x13, NZCV);
2876
2877 __ Msr(NZCV, x20);
2878 __ Ptrue(p[14], SVE_MUL3, s);
2879 __ Mrs(x14, NZCV);
2880
2881 __ Msr(NZCV, x20);
2882 __ Ptrue(p[15], SVE_ALL, s);
2883 __ Mrs(x15, NZCV);
2884
2885 END();
2886
2887 if (CAN_RUN()) {
2888 RUN();
2889
2890 int all = core.GetSVELaneCount(lane_size_in_bits);
2891 int pow2 = 1 << HighestSetBitPosition(all);
2892 int mul4 = all - (all % 4);
2893 int mul3 = all - (all % 3);
2894
2895 // Check P register results.
2896 for (int i = 0; i < all; i++) {
2897 ASSERT_EQUAL_SVE_LANE(i < pow2, p[0], i);
2898 ASSERT_EQUAL_SVE_LANE((all >= 1) && (i < 1), p[1], i);
2899 ASSERT_EQUAL_SVE_LANE((all >= 2) && (i < 2), p[2], i);
2900 ASSERT_EQUAL_SVE_LANE((all >= 5) && (i < 5), p[3], i);
2901 ASSERT_EQUAL_SVE_LANE((all >= 6) && (i < 6), p[4], i);
2902 ASSERT_EQUAL_SVE_LANE((all >= 8) && (i < 8), p[5], i);
2903 ASSERT_EQUAL_SVE_LANE((all >= 16) && (i < 16), p[6], i);
2904 ASSERT_EQUAL_SVE_LANE((all >= 64) && (i < 64), p[7], i);
2905 ASSERT_EQUAL_SVE_LANE((all >= 256) && (i < 256), p[8], i);
2906 ASSERT_EQUAL_SVE_LANE(false, p[9], i);
2907 ASSERT_EQUAL_SVE_LANE(false, p[10], i);
2908 ASSERT_EQUAL_SVE_LANE(false, p[11], i);
2909 ASSERT_EQUAL_SVE_LANE(false, p[12], i);
2910 ASSERT_EQUAL_SVE_LANE(i < mul4, p[13], i);
2911 ASSERT_EQUAL_SVE_LANE(i < mul3, p[14], i);
2912 ASSERT_EQUAL_SVE_LANE(true, p[15], i);
2913 }
2914
2915 // Check NZCV results.
2916 if (s == LeaveFlags) {
2917 // No flags should have been updated.
2918 for (int i = 0; i <= 15; i++) {
2919 ASSERT_EQUAL_64(nzcv_unmodified, XRegister(i));
2920 }
2921 } else {
2922 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
2923 StatusFlags nonzero = SVEFirstFlag;
2924
2925 // POW2
2926 ASSERT_EQUAL_64(nonzero, x0);
2927 // VL*
2928 ASSERT_EQUAL_64((all >= 1) ? nonzero : zero, x1);
2929 ASSERT_EQUAL_64((all >= 2) ? nonzero : zero, x2);
2930 ASSERT_EQUAL_64((all >= 5) ? nonzero : zero, x3);
2931 ASSERT_EQUAL_64((all >= 6) ? nonzero : zero, x4);
2932 ASSERT_EQUAL_64((all >= 8) ? nonzero : zero, x5);
2933 ASSERT_EQUAL_64((all >= 16) ? nonzero : zero, x6);
2934 ASSERT_EQUAL_64((all >= 64) ? nonzero : zero, x7);
2935 ASSERT_EQUAL_64((all >= 256) ? nonzero : zero, x8);
2936 // #uimm5
2937 ASSERT_EQUAL_64(zero, x9);
2938 ASSERT_EQUAL_64(zero, x10);
2939 ASSERT_EQUAL_64(zero, x11);
2940 ASSERT_EQUAL_64(zero, x12);
2941 // MUL*
2942 ASSERT_EQUAL_64((all >= 4) ? nonzero : zero, x13);
2943 ASSERT_EQUAL_64((all >= 3) ? nonzero : zero, x14);
2944 // ALL
2945 ASSERT_EQUAL_64(nonzero, x15);
2946 }
2947 }
2948}
2949
Jacob Bramleye8289202019-07-31 11:25:23 +01002950TEST_SVE(sve_ptrue_b) { PtrueHelper(config, kBRegSize, LeaveFlags); }
2951TEST_SVE(sve_ptrue_h) { PtrueHelper(config, kHRegSize, LeaveFlags); }
2952TEST_SVE(sve_ptrue_s) { PtrueHelper(config, kSRegSize, LeaveFlags); }
2953TEST_SVE(sve_ptrue_d) { PtrueHelper(config, kDRegSize, LeaveFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01002954
Jacob Bramleye8289202019-07-31 11:25:23 +01002955TEST_SVE(sve_ptrues_b) { PtrueHelper(config, kBRegSize, SetFlags); }
2956TEST_SVE(sve_ptrues_h) { PtrueHelper(config, kHRegSize, SetFlags); }
2957TEST_SVE(sve_ptrues_s) { PtrueHelper(config, kSRegSize, SetFlags); }
2958TEST_SVE(sve_ptrues_d) { PtrueHelper(config, kDRegSize, SetFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01002959
Jacob Bramleye8289202019-07-31 11:25:23 +01002960TEST_SVE(sve_pfalse) {
2961 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002962 START();
2963
2964 // Initialise non-zero inputs.
2965 __ Ptrue(p0.VnB());
2966 __ Ptrue(p1.VnH());
2967 __ Ptrue(p2.VnS());
2968 __ Ptrue(p3.VnD());
2969
2970 // The instruction only supports B-sized lanes, but the lane size has no
2971 // logical effect, so the MacroAssembler accepts anything.
2972 __ Pfalse(p0.VnB());
2973 __ Pfalse(p1.VnH());
2974 __ Pfalse(p2.VnS());
2975 __ Pfalse(p3.VnD());
2976
2977 END();
2978
2979 if (CAN_RUN()) {
2980 RUN();
2981
2982 ASSERT_EQUAL_SVE(0, p0.VnB());
2983 ASSERT_EQUAL_SVE(0, p1.VnB());
2984 ASSERT_EQUAL_SVE(0, p2.VnB());
2985 ASSERT_EQUAL_SVE(0, p3.VnB());
2986 }
2987}
2988
Jacob Bramleye8289202019-07-31 11:25:23 +01002989TEST_SVE(sve_ptest) {
2990 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002991 START();
2992
2993 // Initialise NZCV to a known (impossible) value.
2994 StatusFlags nzcv_unmodified = NZCVFlag;
2995 __ Mov(x0, nzcv_unmodified);
2996 __ Msr(NZCV, x0);
2997
2998 // Construct some test inputs.
2999 int in2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0};
3000 int in3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0};
3001 int in4[] = {0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0};
3002 __ Pfalse(p0.VnB());
3003 __ Ptrue(p1.VnB());
3004 Initialise(&masm, p2.VnB(), in2);
3005 Initialise(&masm, p3.VnB(), in3);
3006 Initialise(&masm, p4.VnB(), in4);
3007
3008 // All-inactive pg.
3009 __ Ptest(p0, p0.VnB());
3010 __ Mrs(x0, NZCV);
3011 __ Ptest(p0, p1.VnB());
3012 __ Mrs(x1, NZCV);
3013 __ Ptest(p0, p2.VnB());
3014 __ Mrs(x2, NZCV);
3015 __ Ptest(p0, p3.VnB());
3016 __ Mrs(x3, NZCV);
3017 __ Ptest(p0, p4.VnB());
3018 __ Mrs(x4, NZCV);
3019
3020 // All-active pg.
3021 __ Ptest(p1, p0.VnB());
3022 __ Mrs(x5, NZCV);
3023 __ Ptest(p1, p1.VnB());
3024 __ Mrs(x6, NZCV);
3025 __ Ptest(p1, p2.VnB());
3026 __ Mrs(x7, NZCV);
3027 __ Ptest(p1, p3.VnB());
3028 __ Mrs(x8, NZCV);
3029 __ Ptest(p1, p4.VnB());
3030 __ Mrs(x9, NZCV);
3031
3032 // Combinations of other inputs.
3033 __ Ptest(p2, p2.VnB());
3034 __ Mrs(x20, NZCV);
3035 __ Ptest(p2, p3.VnB());
3036 __ Mrs(x21, NZCV);
3037 __ Ptest(p2, p4.VnB());
3038 __ Mrs(x22, NZCV);
3039 __ Ptest(p3, p2.VnB());
3040 __ Mrs(x23, NZCV);
3041 __ Ptest(p3, p3.VnB());
3042 __ Mrs(x24, NZCV);
3043 __ Ptest(p3, p4.VnB());
3044 __ Mrs(x25, NZCV);
3045 __ Ptest(p4, p2.VnB());
3046 __ Mrs(x26, NZCV);
3047 __ Ptest(p4, p3.VnB());
3048 __ Mrs(x27, NZCV);
3049 __ Ptest(p4, p4.VnB());
3050 __ Mrs(x28, NZCV);
3051
3052 END();
3053
3054 if (CAN_RUN()) {
3055 RUN();
3056
3057 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
3058
3059 // If pg is all inactive, the value of pn is irrelevant.
3060 ASSERT_EQUAL_64(zero, x0);
3061 ASSERT_EQUAL_64(zero, x1);
3062 ASSERT_EQUAL_64(zero, x2);
3063 ASSERT_EQUAL_64(zero, x3);
3064 ASSERT_EQUAL_64(zero, x4);
3065
3066 // All-active pg.
3067 ASSERT_EQUAL_64(zero, x5); // All-inactive pn.
3068 ASSERT_EQUAL_64(SVEFirstFlag, x6); // All-active pn.
3069 // Other pn inputs are non-zero, but the first and last lanes are inactive.
3070 ASSERT_EQUAL_64(SVENotLastFlag, x7);
3071 ASSERT_EQUAL_64(SVENotLastFlag, x8);
3072 ASSERT_EQUAL_64(SVENotLastFlag, x9);
3073
3074 // Other inputs.
3075 ASSERT_EQUAL_64(SVEFirstFlag, x20); // pg: in2, pn: in2
3076 ASSERT_EQUAL_64(NoFlag, x21); // pg: in2, pn: in3
3077 ASSERT_EQUAL_64(zero, x22); // pg: in2, pn: in4
3078 ASSERT_EQUAL_64(static_cast<StatusFlags>(SVEFirstFlag | SVENotLastFlag),
3079 x23); // pg: in3, pn: in2
3080 ASSERT_EQUAL_64(SVEFirstFlag, x24); // pg: in3, pn: in3
3081 ASSERT_EQUAL_64(zero, x25); // pg: in3, pn: in4
3082 ASSERT_EQUAL_64(zero, x26); // pg: in4, pn: in2
3083 ASSERT_EQUAL_64(zero, x27); // pg: in4, pn: in3
3084 ASSERT_EQUAL_64(SVEFirstFlag, x28); // pg: in4, pn: in4
3085 }
3086}
3087
Jacob Bramleye8289202019-07-31 11:25:23 +01003088TEST_SVE(sve_cntp) {
3089 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd961a0c2019-07-17 10:53:45 +01003090 START();
3091
3092 // There are {7, 5, 2, 1} active {B, H, S, D} lanes.
3093 int p0_inputs[] = {0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0};
3094 Initialise(&masm, p0.VnB(), p0_inputs);
3095
3096 // With an all-true predicate, these instructions measure the vector length.
3097 __ Ptrue(p10.VnB());
3098 __ Ptrue(p11.VnH());
3099 __ Ptrue(p12.VnS());
3100 __ Ptrue(p13.VnD());
3101
3102 // `ptrue p10.b` provides an all-active pg.
3103 __ Cntp(x10, p10, p10.VnB());
3104 __ Cntp(x11, p10, p11.VnH());
3105 __ Cntp(x12, p10, p12.VnS());
3106 __ Cntp(x13, p10, p13.VnD());
3107
3108 // Check that the predicate mask is applied properly.
3109 __ Cntp(x14, p10, p10.VnB());
3110 __ Cntp(x15, p11, p10.VnB());
3111 __ Cntp(x16, p12, p10.VnB());
3112 __ Cntp(x17, p13, p10.VnB());
3113
3114 // Check other patterns (including some ignored bits).
3115 __ Cntp(x0, p10, p0.VnB());
3116 __ Cntp(x1, p10, p0.VnH());
3117 __ Cntp(x2, p10, p0.VnS());
3118 __ Cntp(x3, p10, p0.VnD());
3119 __ Cntp(x4, p0, p10.VnB());
3120 __ Cntp(x5, p0, p10.VnH());
3121 __ Cntp(x6, p0, p10.VnS());
3122 __ Cntp(x7, p0, p10.VnD());
3123
3124 END();
3125
3126 if (CAN_RUN()) {
3127 RUN();
3128
3129 int vl_b = core.GetSVELaneCount(kBRegSize);
3130 int vl_h = core.GetSVELaneCount(kHRegSize);
3131 int vl_s = core.GetSVELaneCount(kSRegSize);
3132 int vl_d = core.GetSVELaneCount(kDRegSize);
3133
3134 // Check all-active predicates in various combinations.
3135 ASSERT_EQUAL_64(vl_b, x10);
3136 ASSERT_EQUAL_64(vl_h, x11);
3137 ASSERT_EQUAL_64(vl_s, x12);
3138 ASSERT_EQUAL_64(vl_d, x13);
3139
3140 ASSERT_EQUAL_64(vl_b, x14);
3141 ASSERT_EQUAL_64(vl_h, x15);
3142 ASSERT_EQUAL_64(vl_s, x16);
3143 ASSERT_EQUAL_64(vl_d, x17);
3144
3145 // Check that irrelevant bits are properly ignored.
3146 ASSERT_EQUAL_64(7, x0);
3147 ASSERT_EQUAL_64(5, x1);
3148 ASSERT_EQUAL_64(2, x2);
3149 ASSERT_EQUAL_64(1, x3);
3150
3151 ASSERT_EQUAL_64(7, x4);
3152 ASSERT_EQUAL_64(5, x5);
3153 ASSERT_EQUAL_64(2, x6);
3154 ASSERT_EQUAL_64(1, x7);
3155 }
3156}
3157
TatWai Chong13634762019-07-16 16:20:45 -07003158typedef void (MacroAssembler::*IntBinArithFn)(const ZRegister& zd,
3159 const PRegisterM& pg,
3160 const ZRegister& zn,
3161 const ZRegister& zm);
3162
3163template <typename Td, typename Tg, typename Tn>
3164static void IntBinArithHelper(Test* config,
3165 IntBinArithFn macro,
3166 unsigned lane_size_in_bits,
3167 const Tg& pg_inputs,
3168 const Tn& zn_inputs,
3169 const Tn& zm_inputs,
3170 const Td& zd_expected) {
3171 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3172 START();
3173
3174 ZRegister src_a = z31.WithLaneSize(lane_size_in_bits);
3175 ZRegister src_b = z27.WithLaneSize(lane_size_in_bits);
3176 InsrHelper(&masm, src_a, zn_inputs);
3177 InsrHelper(&masm, src_b, zm_inputs);
3178
3179 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
3180
3181 ZRegister zd_1 = z0.WithLaneSize(lane_size_in_bits);
3182 ZRegister zd_2 = z1.WithLaneSize(lane_size_in_bits);
3183 ZRegister zd_3 = z2.WithLaneSize(lane_size_in_bits);
3184
3185 // `instr` zd(dst), zd(src_a), zn(src_b)
3186 __ Mov(zd_1, src_a);
3187 (masm.*macro)(zd_1, p0.Merging(), zd_1, src_b);
3188
3189 // `instr` zd(dst), zm(src_a), zd(src_b)
3190 // Based on whether zd and zm registers are aliased, the macro of instructions
3191 // (`Instr`) swaps the order of operands if it has the commutative property,
3192 // otherwise, transfer to the reversed `Instr`, such as subr and divr.
3193 __ Mov(zd_2, src_b);
3194 (masm.*macro)(zd_2, p0.Merging(), src_a, zd_2);
3195
3196 // `instr` zd(dst), zm(src_a), zn(src_b)
3197 // The macro of instructions (`Instr`) automatically selects between `instr`
3198 // and movprfx + `instr` based on whether zd and zn registers are aliased.
3199 // A generated moveprfx instruction is predicated that using the same
3200 // governing predicate register. In order to keep the result constant,
3201 // initialize the destination register first.
3202 __ Mov(zd_3, src_a);
3203 (masm.*macro)(zd_3, p0.Merging(), src_a, src_b);
3204
3205 END();
3206
3207 if (CAN_RUN()) {
3208 RUN();
3209 ASSERT_EQUAL_SVE(zd_expected, zd_1);
3210
3211 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
3212 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
3213 if (!core.HasSVELane(zd_1, lane)) break;
3214 if (pg_inputs[i] == 1) {
3215 ASSERT_EQUAL_SVE_LANE(zd_expected[i], zd_1, lane);
3216 } else {
3217 ASSERT_EQUAL_SVE_LANE(zn_inputs[i], zd_1, lane);
3218 }
3219 }
3220
3221 ASSERT_EQUAL_SVE(zd_expected, zd_3);
3222 }
3223}
3224
3225TEST_SVE(sve_binary_arithmetic_predicated_add) {
3226 // clang-format off
3227 unsigned zn_b[] = {0x00, 0x01, 0x10, 0x81, 0xff, 0x0f, 0x01, 0x7f};
3228
3229 unsigned zm_b[] = {0x00, 0x01, 0x10, 0x00, 0x81, 0x80, 0xff, 0xff};
3230
3231 unsigned zn_h[] = {0x0000, 0x0123, 0x1010, 0x8181, 0xffff, 0x0f0f, 0x0101, 0x7f7f};
3232
3233 unsigned zm_h[] = {0x0000, 0x0123, 0x1010, 0x0000, 0x8181, 0x8080, 0xffff, 0xffff};
3234
3235 unsigned zn_s[] = {0x00000000, 0x01234567, 0x10101010, 0x81818181,
3236 0xffffffff, 0x0f0f0f0f, 0x01010101, 0x7f7f7f7f};
3237
3238 unsigned zm_s[] = {0x00000000, 0x01234567, 0x10101010, 0x00000000,
3239 0x81818181, 0x80808080, 0xffffffff, 0xffffffff};
3240
3241 uint64_t zn_d[] = {0x0000000000000000, 0x0123456789abcdef,
3242 0x1010101010101010, 0x8181818181818181,
3243 0xffffffffffffffff, 0x0f0f0f0f0f0f0f0f,
3244 0x0101010101010101, 0x7f7f7f7fffffffff};
3245
3246 uint64_t zm_d[] = {0x0000000000000000, 0x0123456789abcdef,
3247 0x1010101010101010, 0x0000000000000000,
3248 0x8181818181818181, 0x8080808080808080,
3249 0xffffffffffffffff, 0xffffffffffffffff};
3250
3251 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
3252 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
3253 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
3254 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
3255
3256 unsigned add_exp_b[] = {0x00, 0x02, 0x20, 0x81, 0x80, 0x8f, 0x00, 0x7f};
3257
3258 unsigned add_exp_h[] = {0x0000, 0x0246, 0x1010, 0x8181,
3259 0x8180, 0x8f8f, 0x0101, 0x7f7e};
3260
3261 unsigned add_exp_s[] = {0x00000000, 0x01234567, 0x20202020, 0x81818181,
3262 0x81818180, 0x0f0f0f0f, 0x01010100, 0x7f7f7f7e};
3263
3264 uint64_t add_exp_d[] = {0x0000000000000000, 0x02468acf13579bde,
3265 0x2020202020202020, 0x8181818181818181,
3266 0xffffffffffffffff, 0x8f8f8f8f8f8f8f8f,
3267 0x0101010101010100, 0x7f7f7f7ffffffffe};
3268
3269 IntBinArithFn fn = &MacroAssembler::Add;
3270 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, add_exp_b);
3271 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, add_exp_h);
3272 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, add_exp_s);
3273 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, add_exp_d);
3274
3275 unsigned sub_exp_b[] = {0x00, 0x00, 0x00, 0x81, 0x7e, 0x8f, 0x02, 0x7f};
3276
3277 unsigned sub_exp_h[] = {0x0000, 0x0000, 0x1010, 0x8181,
3278 0x7e7e, 0x8e8f, 0x0101, 0x7f80};
3279
3280 unsigned sub_exp_s[] = {0x00000000, 0x01234567, 0x00000000, 0x81818181,
3281 0x7e7e7e7e, 0x0f0f0f0f, 0x01010102, 0x7f7f7f80};
3282
3283 uint64_t sub_exp_d[] = {0x0000000000000000, 0x0000000000000000,
3284 0x0000000000000000, 0x8181818181818181,
3285 0xffffffffffffffff, 0x8e8e8e8e8e8e8e8f,
3286 0x0101010101010102, 0x7f7f7f8000000000};
3287
3288 fn = &MacroAssembler::Sub;
3289 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sub_exp_b);
3290 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sub_exp_h);
3291 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sub_exp_s);
3292 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sub_exp_d);
3293 // clang-format on
3294}
3295
3296TEST_SVE(sve_binary_arithmetic_predicated_umin_umax_uabd) {
3297 // clang-format off
3298 unsigned zn_b[] = {0x00, 0xff, 0x0f, 0xff, 0xf0, 0x98, 0x55, 0x67};
3299
3300 unsigned zm_b[] = {0x01, 0x00, 0x0e, 0xfe, 0xfe, 0xab, 0xcd, 0x78};
3301
3302 unsigned zn_h[] = {0x0000, 0xffff, 0x00ff, 0xffff,
3303 0xff00, 0xba98, 0x5555, 0x4567};
3304
3305 unsigned zm_h[] = {0x0001, 0x0000, 0x00ee, 0xfffe,
3306 0xfe00, 0xabab, 0xcdcd, 0x5678};
3307
3308 unsigned zn_s[] = {0x00000000, 0xffffffff, 0x0000ffff, 0xffffffff,
3309 0xffff0000, 0xfedcba98, 0x55555555, 0x01234567};
3310
3311 unsigned zm_s[] = {0x00000001, 0x00000000, 0x0000eeee, 0xfffffffe,
3312 0xfffe0000, 0xabababab, 0xcdcdcdcd, 0x12345678};
3313
3314 uint64_t zn_d[] = {0x0000000000000000, 0xffffffffffffffff,
3315 0x5555555555555555, 0x0000000001234567};
3316
3317 uint64_t zm_d[] = {0x0000000000000001, 0x0000000000000000,
3318 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
3319
3320 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
3321 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
3322 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
3323 int pg_d[] = {1, 0, 1, 1};
3324
3325 unsigned umax_exp_b[] = {0x01, 0xff, 0x0f, 0xff, 0xfe, 0xab, 0xcd, 0x67};
3326
3327 unsigned umax_exp_h[] = {0x0001, 0xffff, 0x00ff, 0xffff,
3328 0xff00, 0xba98, 0x5555, 0x5678};
3329
3330 unsigned umax_exp_s[] = {0x00000001, 0xffffffff, 0x0000ffff, 0xffffffff,
3331 0xffff0000, 0xfedcba98, 0xcdcdcdcd, 0x12345678};
3332
3333 uint64_t umax_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
3334 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
3335
3336 IntBinArithFn fn = &MacroAssembler::Umax;
3337 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umax_exp_b);
3338 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umax_exp_h);
3339 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umax_exp_s);
3340 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umax_exp_d);
3341
3342 unsigned umin_exp_b[] = {0x00, 0x00, 0x0e, 0xff, 0xf0, 0x98, 0x55, 0x67};
3343
3344 unsigned umin_exp_h[] = {0x0000, 0x0000, 0x00ff, 0xfffe,
3345 0xfe00, 0xabab, 0x5555, 0x4567};
3346
3347 unsigned umin_exp_s[] = {0x00000000, 0xffffffff, 0x0000eeee, 0xfffffffe,
3348 0xfffe0000, 0xfedcba98, 0x55555555, 0x01234567};
3349
3350 uint64_t umin_exp_d[] = {0x0000000000000000, 0xffffffffffffffff,
3351 0x5555555555555555, 0x0000000001234567};
3352 fn = &MacroAssembler::Umin;
3353 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umin_exp_b);
3354 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umin_exp_h);
3355 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umin_exp_s);
3356 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umin_exp_d);
3357
3358 unsigned uabd_exp_b[] = {0x01, 0xff, 0x01, 0xff, 0x0e, 0x13, 0x78, 0x67};
3359
3360 unsigned uabd_exp_h[] = {0x0001, 0xffff, 0x00ff, 0x0001,
3361 0x0100, 0x0eed, 0x5555, 0x1111};
3362
3363 unsigned uabd_exp_s[] = {0x00000001, 0xffffffff, 0x00001111, 0x00000001,
3364 0x00010000, 0xfedcba98, 0x78787878, 0x11111111};
3365
3366 uint64_t uabd_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
3367 0x7878787878787878, 0x0000000011111111};
3368
3369 fn = &MacroAssembler::Uabd;
3370 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, uabd_exp_b);
3371 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, uabd_exp_h);
3372 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, uabd_exp_s);
3373 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, uabd_exp_d);
3374 // clang-format on
3375}
3376
3377TEST_SVE(sve_binary_arithmetic_predicated_smin_smax_sabd) {
3378 // clang-format off
3379 int zn_b[] = {0, -128, -128, -128, -128, 127, 127, 1};
3380
3381 int zm_b[] = {-1, 0, -1, -127, 127, 126, -1, 0};
3382
3383 int zn_h[] = {0, INT16_MIN, INT16_MIN, INT16_MIN,
3384 INT16_MIN, INT16_MAX, INT16_MAX, 1};
3385
3386 int zm_h[] = {-1, 0, -1, INT16_MIN + 1,
3387 INT16_MAX, INT16_MAX - 1, -1, 0};
3388
3389 int zn_s[] = {0, INT32_MIN, INT32_MIN, INT32_MIN,
3390 INT32_MIN, INT32_MAX, INT32_MAX, 1};
3391
3392 int zm_s[] = {-1, 0, -1, -INT32_MAX,
3393 INT32_MAX, INT32_MAX - 1, -1, 0};
3394
3395 int64_t zn_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
3396 INT64_MIN, INT64_MAX, INT64_MAX, 1};
3397
3398 int64_t zm_d[] = {-1, 0, -1, INT64_MIN + 1,
3399 INT64_MAX, INT64_MAX - 1, -1, 0};
3400
3401 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
3402 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
3403 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
3404 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
3405
3406 int smax_exp_b[] = {0, 0, -1, -128, 127, 127, 127, 1};
3407
3408 int smax_exp_h[] = {0, 0, INT16_MIN, INT16_MIN + 1,
3409 INT16_MAX, INT16_MAX, INT16_MAX, 1};
3410
3411 int smax_exp_s[] = {0, INT32_MIN, -1, INT32_MIN + 1,
3412 INT32_MAX, INT32_MAX, INT32_MAX, 1};
3413
3414 int64_t smax_exp_d[] = {0, 0, -1, INT64_MIN + 1,
3415 INT64_MIN, INT64_MAX, INT64_MAX, 1};
3416
3417 IntBinArithFn fn = &MacroAssembler::Smax;
3418 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smax_exp_b);
3419 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smax_exp_h);
3420 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smax_exp_s);
3421 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smax_exp_d);
3422
3423 int smin_exp_b[] = {-1, -128, -128, -128, -128, 126, -1, 1};
3424
3425 int smin_exp_h[] = {-1, INT16_MIN, INT16_MIN, INT16_MIN,
3426 INT16_MIN, INT16_MAX - 1, INT16_MAX, 0};
3427
3428 int smin_exp_s[] = {-1, INT32_MIN, INT32_MIN, INT32_MIN,
3429 INT32_MIN, INT32_MAX, -1, 0};
3430
3431 int64_t smin_exp_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
3432 INT64_MIN, INT64_MAX - 1, -1, 0};
3433
3434 fn = &MacroAssembler::Smin;
3435 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smin_exp_b);
3436 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smin_exp_h);
3437 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smin_exp_s);
3438 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smin_exp_d);
3439
3440 unsigned sabd_exp_b[] = {1, 128, 127, 128, 255, 1, 128, 1};
3441
3442 unsigned sabd_exp_h[] = {1, 0x8000, 0x8000, 1, 0xffff, 1, 0x7fff, 1};
3443
3444 unsigned sabd_exp_s[] = {1, 0x80000000, 0x7fffffff, 1,
3445 0xffffffff, 0x7fffffff, 0x80000000, 1};
3446
3447 uint64_t sabd_exp_d[] = {0, 0x8000000000000000, 0x7fffffffffffffff, 1,
3448 0x8000000000000000, 1, 0x8000000000000000, 1};
3449
3450 fn = &MacroAssembler::Sabd;
3451 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sabd_exp_b);
3452 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sabd_exp_h);
3453 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sabd_exp_s);
3454 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sabd_exp_d);
3455 // clang-format on
3456}
3457
3458TEST_SVE(sve_binary_arithmetic_predicated_mul_umulh) {
3459 // clang-format off
3460 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
3461
3462 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
3463
3464 unsigned zn_h[] = {0x0000, 0x0001, 0x0020, 0x0800,
3465 0x8000, 0xff00, 0x5555, 0xaaaa};
3466
3467 unsigned zm_h[] = {0x007f, 0x00cd, 0x0800, 0xffff,
3468 0x5555, 0xaaaa, 0x0001, 0x1234};
3469
3470 unsigned zn_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
3471 0x12345678, 0xffffffff, 0x55555555, 0xaaaaaaaa};
3472
3473 unsigned zm_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
3474 0x12345678, 0x22223333, 0x55556666, 0x77778888};
3475
3476 uint64_t zn_d[] = {0x0000000000000000, 0x5555555555555555,
3477 0xffffffffffffffff, 0xaaaaaaaaaaaaaaaa};
3478
3479 uint64_t zm_d[] = {0x0000000000000000, 0x1111111133333333,
3480 0xddddddddeeeeeeee, 0xaaaaaaaaaaaaaaaa};
3481
3482 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
3483 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
3484 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
3485 int pg_d[] = {1, 1, 0, 1};
3486
3487 unsigned mul_exp_b[] = {0x00, 0xcd, 0x00, 0xf8, 0x80, 0x56, 0x00, 0x50};
3488
3489 unsigned mul_exp_h[] = {0x0000, 0x0001, 0x0000, 0xf800,
3490 0x8000, 0xff00, 0x5555, 0x9e88};
3491
3492 unsigned mul_exp_s[] = {0x00000000, 0x00000001, 0x00200020, 0x00400000,
3493 0x1df4d840, 0xddddcccd, 0x55555555, 0xb05afa50};
3494
3495 uint64_t mul_exp_d[] = {0x0000000000000000, 0xa4fa4fa4eeeeeeef,
3496 0xffffffffffffffff, 0x38e38e38e38e38e4};
3497
3498 IntBinArithFn fn = &MacroAssembler::Mul;
3499 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, mul_exp_b);
3500 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, mul_exp_h);
3501 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, mul_exp_s);
3502 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, mul_exp_d);
3503
3504 unsigned umulh_exp_b[] = {0x00, 0x00, 0x10, 0x07, 0x80, 0xa9, 0x00, 0x05};
3505
3506 unsigned umulh_exp_h[] = {0x0000, 0x0001, 0x0001, 0x07ff,
3507 0x2aaa, 0xff00, 0x0000, 0x0c22};
3508
3509 unsigned umulh_exp_s[] = {0x00000000, 0x00000000, 0x00200020, 0x00400080,
3510 0x014b66dc, 0x22223332, 0x55555555, 0x4fa505af};
3511
3512 uint64_t umulh_exp_d[] = {0x0000000000000000, 0x05b05b05bbbbbbbb,
3513 0xffffffffffffffff, 0x71c71c71c71c71c6};
3514
3515 fn = &MacroAssembler::Umulh;
3516 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umulh_exp_b);
3517 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umulh_exp_h);
3518 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umulh_exp_s);
3519 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umulh_exp_d);
3520 // clang-format on
3521}
3522
3523TEST_SVE(sve_binary_arithmetic_predicated_smulh) {
3524 // clang-format off
3525 int zn_b[] = {0, 1, -1, INT8_MIN, INT8_MAX, -1, 100, -3};
3526
3527 int zm_b[] = {0, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, -1, 2, 66};
3528
3529 int zn_h[] = {0, 1, -1, INT16_MIN, INT16_MAX, -1, 10000, -3};
3530
3531 int zm_h[] = {0, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, -1, 2, 6666};
3532
3533 int zn_s[] = {0, 1, -1, INT32_MIN, INT32_MAX, -1, 100000000, -3};
3534
3535 int zm_s[] = {0, INT32_MIN, INT32_MIN, INT32_MAX, INT32_MAX, -1, 2, 66666666};
3536
3537 int64_t zn_d[] = {0, -1, INT64_MIN, INT64_MAX};
3538
3539 int64_t zm_d[] = {INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX};
3540
3541 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
3542 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
3543 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
3544 int pg_d[] = {1, 1, 0, 1};
3545
3546 int exp_b[] = {0, -1, 0, -64, INT8_MAX, 0, 0, -1};
3547
3548 int exp_h[] = {0, 1, 0, -16384, 16383, -1, 0, -1};
3549
3550 int exp_s[] = {0, -1, -1, -1073741824, 1073741823, 0, 100000000, -1};
3551
3552 int64_t exp_d[] = {0, -1, INT64_MIN, 4611686018427387903};
3553
3554 IntBinArithFn fn = &MacroAssembler::Smulh;
3555 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, exp_b);
3556 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, exp_h);
3557 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
3558 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
3559 // clang-format on
3560}
3561
3562TEST_SVE(sve_binary_arithmetic_predicated_logical) {
3563 // clang-format off
3564 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
3565 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
3566
3567 unsigned zn_h[] = {0x0000, 0x0001, 0x2020, 0x0008,
3568 0x8000, 0xffff, 0x5555, 0xaaaa};
3569 unsigned zm_h[] = {0x7fff, 0xabcd, 0x8000, 0xffff,
3570 0x5555, 0xaaaa, 0x0000, 0x0800};
3571
3572 unsigned zn_s[] = {0x00000001, 0x20200008, 0x8000ffff, 0x5555aaaa};
3573 unsigned zm_s[] = {0x7fffabcd, 0x8000ffff, 0x5555aaaa, 0x00000800};
3574
3575 uint64_t zn_d[] = {0xfedcba9876543210, 0x0123456789abcdef,
3576 0x0001200880ff55aa, 0x0022446688aaccee};
3577 uint64_t zm_d[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff,
3578 0x7fcd80ff55aa0008, 0x1133557799bbddff};
3579
3580 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
3581 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
3582 int pg_s[] = {1, 1, 1, 0};
3583 int pg_d[] = {1, 1, 0, 1};
3584
3585 unsigned and_exp_b[] = {0x00, 0x01, 0x00, 0x08, 0x80, 0xaa, 0x00, 0x08};
3586
3587 unsigned and_exp_h[] = {0x0000, 0x0001, 0x0000, 0x0008,
3588 0x0000, 0xffff, 0x0000, 0x0800};
3589
3590 unsigned and_exp_s[] = {0x00000001, 0x00000008, 0x0000aaaa, 0x5555aaaa};
3591
3592 uint64_t and_exp_d[] = {0xfedcaa8854540000, 0x0000454588aacdef,
3593 0x0001200880ff55aa, 0x0022446688aaccee};
3594
3595 IntBinArithFn fn = &MacroAssembler::And;
3596 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, and_exp_b);
3597 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, and_exp_h);
3598 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, and_exp_s);
3599 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, and_exp_d);
3600
3601 unsigned bic_exp_b[] = {0x00, 0x00, 0x20, 0x00, 0x80, 0x55, 0x55, 0xa2};
3602
3603 unsigned bic_exp_h[] = {0x0000, 0x0001, 0x2020, 0x0000,
3604 0x8000, 0xffff, 0x5555, 0xa2aa};
3605
3606 unsigned bic_exp_s[] = {0x00000000, 0x20200000, 0x80005555, 0x5555aaaa};
3607
3608 uint64_t bic_exp_d[] = {0x0000101022003210, 0x0123002201010000,
3609 0x0001200880ff55aa, 0x0000000000000000};
3610
3611 fn = &MacroAssembler::Bic;
3612 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, bic_exp_b);
3613 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, bic_exp_h);
3614 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, bic_exp_s);
3615 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, bic_exp_d);
3616
3617 unsigned eor_exp_b[] = {0x00, 0xcc, 0xa0, 0xf7, 0x80, 0x55, 0x55, 0xa2};
3618
3619 unsigned eor_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xfff7,
3620 0xd555, 0xffff, 0x5555, 0xa2aa};
3621
3622 unsigned eor_exp_s[] = {0x7fffabcc, 0xa020fff7, 0xd5555555, 0x5555aaaa};
3623
3624 uint64_t eor_exp_d[] = {0x01235476ab89fedc, 0xcdef98ba67453210,
3625 0x0001200880ff55aa, 0x1111111111111111};
3626
3627 fn = &MacroAssembler::Eor;
3628 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, eor_exp_b);
3629 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, eor_exp_h);
3630 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, eor_exp_s);
3631 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, eor_exp_d);
3632
3633 unsigned orr_exp_b[] = {0x00, 0xcd, 0xa0, 0xff, 0x80, 0xff, 0x55, 0xaa};
3634
3635 unsigned orr_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xffff,
3636 0xd555, 0xffff, 0x5555, 0xaaaa};
3637
3638 unsigned orr_exp_s[] = {0x7fffabcd, 0xa020ffff, 0xd555ffff, 0x5555aaaa};
3639
3640 uint64_t orr_exp_d[] = {0xfffffefeffddfedc, 0xcdefddffefefffff,
3641 0x0001200880ff55aa, 0x1133557799bbddff};
3642
3643 fn = &MacroAssembler::Orr;
3644 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, orr_exp_b);
3645 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, orr_exp_h);
3646 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, orr_exp_s);
3647 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, orr_exp_d);
3648 // clang-format on
3649}
3650
3651TEST_SVE(sve_binary_arithmetic_predicated_sdiv) {
3652 // clang-format off
3653 int zn_s[] = {0, 1, -1, 2468,
3654 INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX,
3655 -11111111, 87654321, 0, 0};
3656
3657 int zm_s[] = {1, -1, 1, 1234,
3658 -1, INT32_MIN, 1, -1,
3659 22222222, 80000000, -1, 0};
3660
3661 int64_t zn_d[] = {0, 1, -1, 2468,
3662 INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX,
3663 -11111111, 87654321, 0, 0};
3664
3665 int64_t zm_d[] = {1, -1, 1, 1234,
3666 -1, INT64_MIN, 1, -1,
3667 22222222, 80000000, -1, 0};
3668
3669 int pg_s[] = {1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0};
3670 int pg_d[] = {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1};
3671
3672 int exp_s[] = {0, 1, -1, 2,
3673 INT32_MIN, 0, INT32_MIN, -INT32_MAX,
3674 0, 1, 0, 0};
3675
3676 int64_t exp_d[] = {0, -1, -1, 2,
3677 INT64_MIN, INT64_MAX, INT64_MIN, -INT64_MAX,
3678 0, 1, 0, 0};
3679
3680 IntBinArithFn fn = &MacroAssembler::Sdiv;
3681 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
3682 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
3683 // clang-format on
3684}
3685
3686TEST_SVE(sve_binary_arithmetic_predicated_udiv) {
3687 // clang-format off
3688 unsigned zn_s[] = {0x00000000, 0x00000001, 0xffffffff, 0x80000000,
3689 0xffffffff, 0x80000000, 0xffffffff, 0x0000f000};
3690
3691 unsigned zm_s[] = {0x00000001, 0xffffffff, 0x80000000, 0x00000002,
3692 0x00000000, 0x00000001, 0x00008000, 0xf0000000};
3693
3694 uint64_t zn_d[] = {0x0000000000000000, 0x0000000000000001,
3695 0xffffffffffffffff, 0x8000000000000000,
3696 0xffffffffffffffff, 0x8000000000000000,
3697 0xffffffffffffffff, 0xf0000000f0000000};
3698
3699 uint64_t zm_d[] = {0x0000000000000001, 0xffffffff00000000,
3700 0x8000000000000000, 0x0000000000000002,
3701 0x8888888888888888, 0x0000000000000001,
3702 0x0000000080000000, 0x00000000f0000000};
3703
3704 int pg_s[] = {1, 1, 0, 1, 1, 0, 1, 1};
3705 int pg_d[] = {1, 0, 1, 1, 1, 1, 0, 1};
3706
3707 unsigned exp_s[] = {0x00000000, 0x00000000, 0xffffffff, 0x40000000,
3708 0x00000000, 0x80000000, 0x0001ffff, 0x00000000};
3709
3710 uint64_t exp_d[] = {0x0000000000000000, 0x0000000000000001,
3711 0x0000000000000001, 0x4000000000000000,
3712 0x0000000000000001, 0x8000000000000000,
3713 0xffffffffffffffff, 0x0000000100000001};
3714
3715 IntBinArithFn fn = &MacroAssembler::Udiv;
3716 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
3717 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
3718 // clang-format on
3719}
3720
TatWai Chong845246b2019-08-08 00:01:58 -07003721typedef void (MacroAssembler::*IntArithFn)(const ZRegister& zd,
3722 const ZRegister& zn,
3723 const ZRegister& zm);
3724
3725template <typename T>
3726static void IntArithHelper(Test* config,
3727 IntArithFn macro,
3728 unsigned lane_size_in_bits,
3729 const T& zn_inputs,
3730 const T& zm_inputs,
3731 const T& zd_expected) {
3732 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3733 START();
3734
3735 ZRegister zn = z31.WithLaneSize(lane_size_in_bits);
3736 ZRegister zm = z27.WithLaneSize(lane_size_in_bits);
3737 InsrHelper(&masm, zn, zn_inputs);
3738 InsrHelper(&masm, zm, zm_inputs);
3739
3740 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
3741 (masm.*macro)(zd, zn, zm);
3742
3743 END();
3744
3745 if (CAN_RUN()) {
3746 RUN();
3747 ASSERT_EQUAL_SVE(zd_expected, zd);
3748 }
3749}
3750
3751TEST_SVE(sve_arithmetic_unpredicated_add_sqadd_uqadd) {
3752 // clang-format off
3753 unsigned ins_b[] = {0x81, 0x7f, 0x10, 0xaa, 0x55, 0xff, 0xf0};
3754 unsigned ins_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa, 0x5555, 0xffff, 0xf0f0};
3755 unsigned ins_s[] = {0x80018181, 0x7fff7f7f, 0x10001010, 0xaaaaaaaa, 0xf000f0f0};
3756 uint64_t ins_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
3757 0x1000000010001010, 0xf0000000f000f0f0};
3758
3759 IntArithFn fn = &MacroAssembler::Add;
3760
3761 unsigned add_exp_b[] = {0x02, 0xfe, 0x20, 0x54, 0xaa, 0xfe, 0xe0};
3762 unsigned add_exp_h[] = {0x0302, 0xfefe, 0x2020, 0x5554, 0xaaaa, 0xfffe, 0xe1e0};
3763 unsigned add_exp_s[] = {0x00030302, 0xfffefefe, 0x20002020, 0x55555554, 0xe001e1e0};
3764 uint64_t add_exp_d[] = {0x0000000300030302, 0xfffffffefffefefe,
3765 0x2000000020002020, 0xe0000001e001e1e0};
3766
3767 IntArithHelper(config, fn, kBRegSize, ins_b, ins_b, add_exp_b);
3768 IntArithHelper(config, fn, kHRegSize, ins_h, ins_h, add_exp_h);
3769 IntArithHelper(config, fn, kSRegSize, ins_s, ins_s, add_exp_s);
3770 IntArithHelper(config, fn, kDRegSize, ins_d, ins_d, add_exp_d);
3771
3772 fn = &MacroAssembler::Sqadd;
3773
3774 unsigned sqadd_exp_b[] = {0x80, 0x7f, 0x20, 0x80, 0x7f, 0xfe, 0xe0};
3775 unsigned sqadd_exp_h[] = {0x8000, 0x7fff, 0x2020, 0x8000, 0x7fff, 0xfffe, 0xe1e0};
3776 unsigned sqadd_exp_s[] = {0x80000000, 0x7fffffff, 0x20002020, 0x80000000, 0xe001e1e0};
3777 uint64_t sqadd_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
3778 0x2000000020002020, 0xe0000001e001e1e0};
3779
3780 IntArithHelper(config, fn, kBRegSize, ins_b, ins_b, sqadd_exp_b);
3781 IntArithHelper(config, fn, kHRegSize, ins_h, ins_h, sqadd_exp_h);
3782 IntArithHelper(config, fn, kSRegSize, ins_s, ins_s, sqadd_exp_s);
3783 IntArithHelper(config, fn, kDRegSize, ins_d, ins_d, sqadd_exp_d);
3784
3785 fn = &MacroAssembler::Uqadd;
3786
3787 unsigned uqadd_exp_b[] = {0xff, 0xfe, 0x20, 0xff, 0xaa, 0xff, 0xff};
3788 unsigned uqadd_exp_h[] = {0xffff, 0xfefe, 0x2020, 0xffff, 0xaaaa, 0xffff, 0xffff};
3789 unsigned uqadd_exp_s[] = {0xffffffff, 0xfffefefe, 0x20002020, 0xffffffff, 0xffffffff};
3790 uint64_t uqadd_exp_d[] = {0xffffffffffffffff, 0xfffffffefffefefe,
3791 0x2000000020002020, 0xffffffffffffffff};
3792
3793 IntArithHelper(config, fn, kBRegSize, ins_b, ins_b, uqadd_exp_b);
3794 IntArithHelper(config, fn, kHRegSize, ins_h, ins_h, uqadd_exp_h);
3795 IntArithHelper(config, fn, kSRegSize, ins_s, ins_s, uqadd_exp_s);
3796 IntArithHelper(config, fn, kDRegSize, ins_d, ins_d, uqadd_exp_d);
3797 // clang-format on
3798}
3799
3800TEST_SVE(sve_arithmetic_unpredicated_sub_sqsub_uqsub) {
3801 // clang-format off
3802
3803 unsigned ins1_b[] = {0x81, 0x7f, 0x7e, 0xaa};
3804 unsigned ins2_b[] = {0x10, 0xf0, 0xf0, 0x55};
3805
3806 unsigned ins1_h[] = {0x8181, 0x7f7f, 0x7e7e, 0xaaaa};
3807 unsigned ins2_h[] = {0x1010, 0xf0f0, 0xf0f0, 0x5555};
3808
3809 unsigned ins1_s[] = {0x80018181, 0x7fff7f7f, 0x7eee7e7e, 0xaaaaaaaa};
3810 unsigned ins2_s[] = {0x10001010, 0xf000f0f0, 0xf000f0f0, 0x55555555};
3811
3812 uint64_t ins1_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
3813 0x7eeeeeee7eee7e7e, 0xaaaaaaaaaaaaaaaa};
3814 uint64_t ins2_d[] = {0x1000000010001010, 0xf0000000f000f0f0,
3815 0xf0000000f000f0f0, 0x5555555555555555};
3816
3817 IntArithFn fn = &MacroAssembler::Sub;
3818
3819 unsigned ins1_sub_ins2_exp_b[] = {0x71, 0x8f, 0x8e, 0x55};
3820 unsigned ins1_sub_ins2_exp_h[] = {0x7171, 0x8e8f, 0x8d8e, 0x5555};
3821 unsigned ins1_sub_ins2_exp_s[] = {0x70017171, 0x8ffe8e8f, 0x8eed8d8e, 0x55555555};
3822 uint64_t ins1_sub_ins2_exp_d[] = {0x7000000170017171, 0x8ffffffe8ffe8e8f,
3823 0x8eeeeeed8eed8d8e, 0x5555555555555555};
3824
3825 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sub_ins2_exp_b);
3826 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sub_ins2_exp_h);
3827 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sub_ins2_exp_s);
3828 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sub_ins2_exp_d);
3829
3830 unsigned ins2_sub_ins1_exp_b[] = {0x8f, 0x71, 0x72, 0xab};
3831 unsigned ins2_sub_ins1_exp_h[] = {0x8e8f, 0x7171, 0x7272, 0xaaab};
3832 unsigned ins2_sub_ins1_exp_s[] = {0x8ffe8e8f, 0x70017171, 0x71127272, 0xaaaaaaab};
3833 uint64_t ins2_sub_ins1_exp_d[] = {0x8ffffffe8ffe8e8f, 0x7000000170017171,
3834 0x7111111271127272, 0xaaaaaaaaaaaaaaab};
3835
3836 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sub_ins1_exp_b);
3837 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sub_ins1_exp_h);
3838 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sub_ins1_exp_s);
3839 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sub_ins1_exp_d);
3840
3841 fn = &MacroAssembler::Sqsub;
3842
3843 unsigned ins1_sqsub_ins2_exp_b[] = {0x80, 0x7f, 0x7f, 0x80};
3844 unsigned ins1_sqsub_ins2_exp_h[] = {0x8000, 0x7fff, 0x7fff, 0x8000};
3845 unsigned ins1_sqsub_ins2_exp_s[] = {0x80000000, 0x7fffffff, 0x7fffffff, 0x80000000};
3846 uint64_t ins1_sqsub_ins2_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
3847 0x7fffffffffffffff, 0x8000000000000000};
3848
3849 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sqsub_ins2_exp_b);
3850 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sqsub_ins2_exp_h);
3851 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sqsub_ins2_exp_s);
3852 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sqsub_ins2_exp_d);
3853
3854 unsigned ins2_sqsub_ins1_exp_b[] = {0x7f, 0x80, 0x80, 0x7f};
3855 unsigned ins2_sqsub_ins1_exp_h[] = {0x7fff, 0x8000, 0x8000, 0x7fff};
3856 unsigned ins2_sqsub_ins1_exp_s[] = {0x7fffffff, 0x80000000, 0x80000000, 0x7fffffff};
3857 uint64_t ins2_sqsub_ins1_exp_d[] = {0x7fffffffffffffff, 0x8000000000000000,
3858 0x8000000000000000, 0x7fffffffffffffff};
3859
3860 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sqsub_ins1_exp_b);
3861 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sqsub_ins1_exp_h);
3862 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sqsub_ins1_exp_s);
3863 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sqsub_ins1_exp_d);
3864
3865 fn = &MacroAssembler::Uqsub;
3866
3867 unsigned ins1_uqsub_ins2_exp_b[] = {0x71, 0x00, 0x00, 0x55};
3868 unsigned ins1_uqsub_ins2_exp_h[] = {0x7171, 0x0000, 0x0000, 0x5555};
3869 unsigned ins1_uqsub_ins2_exp_s[] = {0x70017171, 0x00000000, 0x00000000, 0x55555555};
3870 uint64_t ins1_uqsub_ins2_exp_d[] = {0x7000000170017171, 0x0000000000000000,
3871 0x0000000000000000, 0x5555555555555555};
3872
3873 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_uqsub_ins2_exp_b);
3874 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_uqsub_ins2_exp_h);
3875 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_uqsub_ins2_exp_s);
3876 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_uqsub_ins2_exp_d);
3877
3878 unsigned ins2_uqsub_ins1_exp_b[] = {0x00, 0x71, 0x72, 0x00};
3879 unsigned ins2_uqsub_ins1_exp_h[] = {0x0000, 0x7171, 0x7272, 0x0000};
3880 unsigned ins2_uqsub_ins1_exp_s[] = {0x00000000, 0x70017171, 0x71127272, 0x00000000};
3881 uint64_t ins2_uqsub_ins1_exp_d[] = {0x0000000000000000, 0x7000000170017171,
3882 0x7111111271127272, 0x0000000000000000};
3883
3884 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_uqsub_ins1_exp_b);
3885 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_uqsub_ins1_exp_h);
3886 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_uqsub_ins1_exp_s);
3887 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_uqsub_ins1_exp_d);
3888 // clang-format on
3889}
3890
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01003891TEST_SVE(sve_rdvl) {
3892 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3893 START();
3894
3895 // Encodable multipliers.
3896 __ Rdvl(x0, 0);
3897 __ Rdvl(x1, 1);
3898 __ Rdvl(x2, 2);
3899 __ Rdvl(x3, 31);
3900 __ Rdvl(x4, -1);
3901 __ Rdvl(x5, -2);
3902 __ Rdvl(x6, -32);
3903
3904 // For unencodable multipliers, the MacroAssembler uses a sequence of
3905 // instructions.
3906 __ Rdvl(x10, 32);
3907 __ Rdvl(x11, -33);
3908 __ Rdvl(x12, 42);
3909 __ Rdvl(x13, -42);
3910
3911 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
3912 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
3913 // occurs in the macro.
3914 __ Rdvl(x14, 0x007fffffffffffff);
3915 __ Rdvl(x15, -0x0080000000000000);
3916
3917 END();
3918
3919 if (CAN_RUN()) {
3920 RUN();
3921
3922 uint64_t vl = config->sve_vl_in_bytes();
3923
3924 ASSERT_EQUAL_64(vl * 0, x0);
3925 ASSERT_EQUAL_64(vl * 1, x1);
3926 ASSERT_EQUAL_64(vl * 2, x2);
3927 ASSERT_EQUAL_64(vl * 31, x3);
3928 ASSERT_EQUAL_64(vl * -1, x4);
3929 ASSERT_EQUAL_64(vl * -2, x5);
3930 ASSERT_EQUAL_64(vl * -32, x6);
3931
3932 ASSERT_EQUAL_64(vl * 32, x10);
3933 ASSERT_EQUAL_64(vl * -33, x11);
3934 ASSERT_EQUAL_64(vl * 42, x12);
3935 ASSERT_EQUAL_64(vl * -42, x13);
3936
3937 ASSERT_EQUAL_64(vl * 0x007fffffffffffff, x14);
3938 ASSERT_EQUAL_64(vl * 0xff80000000000000, x15);
3939 }
3940}
3941
3942TEST_SVE(sve_rdpl) {
3943 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3944 START();
3945
3946 // There is no `rdpl` instruction, so the MacroAssembler maps `Rdpl` onto
3947 // Addpl(xd, xzr, ...).
3948
3949 // Encodable multipliers (as `addvl`).
3950 __ Rdpl(x0, 0);
3951 __ Rdpl(x1, 8);
3952 __ Rdpl(x2, 248);
3953 __ Rdpl(x3, -8);
3954 __ Rdpl(x4, -256);
3955
3956 // Encodable multipliers (as `movz` + `addpl`).
3957 __ Rdpl(x7, 31);
3958 __ Rdpl(x8, -32);
3959
3960 // For unencodable multipliers, the MacroAssembler uses a sequence of
3961 // instructions.
3962 __ Rdpl(x10, 42);
3963 __ Rdpl(x11, -42);
3964
3965 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
3966 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
3967 // occurs in the macro.
3968 __ Rdpl(x12, 0x007fffffffffffff);
3969 __ Rdpl(x13, -0x0080000000000000);
3970
3971 END();
3972
3973 if (CAN_RUN()) {
3974 RUN();
3975
3976 uint64_t vl = config->sve_vl_in_bytes();
3977 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
3978 uint64_t pl = vl / kZRegBitsPerPRegBit;
3979
3980 ASSERT_EQUAL_64(pl * 0, x0);
3981 ASSERT_EQUAL_64(pl * 8, x1);
3982 ASSERT_EQUAL_64(pl * 248, x2);
3983 ASSERT_EQUAL_64(pl * -8, x3);
3984 ASSERT_EQUAL_64(pl * -256, x4);
3985
3986 ASSERT_EQUAL_64(pl * 31, x7);
3987 ASSERT_EQUAL_64(pl * -32, x8);
3988
3989 ASSERT_EQUAL_64(pl * 42, x10);
3990 ASSERT_EQUAL_64(pl * -42, x11);
3991
3992 ASSERT_EQUAL_64(pl * 0x007fffffffffffff, x12);
3993 ASSERT_EQUAL_64(pl * 0xff80000000000000, x13);
3994 }
3995}
3996
3997TEST_SVE(sve_addvl) {
3998 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3999 START();
4000
4001 uint64_t base = 0x1234567800000000;
4002 __ Mov(x30, base);
4003
4004 // Encodable multipliers.
4005 __ Addvl(x0, x30, 0);
4006 __ Addvl(x1, x30, 1);
4007 __ Addvl(x2, x30, 31);
4008 __ Addvl(x3, x30, -1);
4009 __ Addvl(x4, x30, -32);
4010
4011 // For unencodable multipliers, the MacroAssembler uses `Rdvl` and `Add`.
4012 __ Addvl(x5, x30, 32);
4013 __ Addvl(x6, x30, -33);
4014
4015 // Test the limits of the multiplier supported by the `Rdvl` macro.
4016 __ Addvl(x7, x30, 0x007fffffffffffff);
4017 __ Addvl(x8, x30, -0x0080000000000000);
4018
4019 // Check that xzr behaves correctly.
4020 __ Addvl(x9, xzr, 8);
4021 __ Addvl(x10, xzr, 42);
4022
4023 // Check that sp behaves correctly with encodable and unencodable multipliers.
4024 __ Addvl(sp, sp, -5);
4025 __ Addvl(sp, sp, -37);
4026 __ Addvl(x11, sp, -2);
4027 __ Addvl(sp, x11, 2);
4028 __ Addvl(x12, sp, -42);
4029
4030 // Restore the value of sp.
4031 __ Addvl(sp, x11, 39);
4032 __ Addvl(sp, sp, 5);
4033
4034 // Adjust x11 and x12 to make the test sp-agnostic.
4035 __ Sub(x11, sp, x11);
4036 __ Sub(x12, sp, x12);
4037
4038 // Check cases where xd.Is(xn). This stresses scratch register allocation.
4039 __ Mov(x20, x30);
4040 __ Mov(x21, x30);
4041 __ Mov(x22, x30);
4042 __ Addvl(x20, x20, 4);
4043 __ Addvl(x21, x21, 42);
4044 __ Addvl(x22, x22, -0x0080000000000000);
4045
4046 END();
4047
4048 if (CAN_RUN()) {
4049 RUN();
4050
4051 uint64_t vl = config->sve_vl_in_bytes();
4052
4053 ASSERT_EQUAL_64(base + (vl * 0), x0);
4054 ASSERT_EQUAL_64(base + (vl * 1), x1);
4055 ASSERT_EQUAL_64(base + (vl * 31), x2);
4056 ASSERT_EQUAL_64(base + (vl * -1), x3);
4057 ASSERT_EQUAL_64(base + (vl * -32), x4);
4058
4059 ASSERT_EQUAL_64(base + (vl * 32), x5);
4060 ASSERT_EQUAL_64(base + (vl * -33), x6);
4061
4062 ASSERT_EQUAL_64(base + (vl * 0x007fffffffffffff), x7);
4063 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x8);
4064
4065 ASSERT_EQUAL_64(vl * 8, x9);
4066 ASSERT_EQUAL_64(vl * 42, x10);
4067
4068 ASSERT_EQUAL_64(vl * 44, x11);
4069 ASSERT_EQUAL_64(vl * 84, x12);
4070
4071 ASSERT_EQUAL_64(base + (vl * 4), x20);
4072 ASSERT_EQUAL_64(base + (vl * 42), x21);
4073 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x22);
4074
4075 ASSERT_EQUAL_64(base, x30);
4076 }
4077}
4078
4079TEST_SVE(sve_addpl) {
4080 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4081 START();
4082
4083 uint64_t base = 0x1234567800000000;
4084 __ Mov(x30, base);
4085
4086 // Encodable multipliers.
4087 __ Addpl(x0, x30, 0);
4088 __ Addpl(x1, x30, 1);
4089 __ Addpl(x2, x30, 31);
4090 __ Addpl(x3, x30, -1);
4091 __ Addpl(x4, x30, -32);
4092
4093 // For unencodable multipliers, the MacroAssembler uses `Addvl` if it can, or
4094 // it falls back to `Rdvl` and `Add`.
4095 __ Addpl(x5, x30, 32);
4096 __ Addpl(x6, x30, -33);
4097
4098 // Test the limits of the multiplier supported by the `Rdvl` macro.
4099 __ Addpl(x7, x30, 0x007fffffffffffff);
4100 __ Addpl(x8, x30, -0x0080000000000000);
4101
4102 // Check that xzr behaves correctly.
4103 __ Addpl(x9, xzr, 8);
4104 __ Addpl(x10, xzr, 42);
4105
4106 // Check that sp behaves correctly with encodable and unencodable multipliers.
4107 __ Addpl(sp, sp, -5);
4108 __ Addpl(sp, sp, -37);
4109 __ Addpl(x11, sp, -2);
4110 __ Addpl(sp, x11, 2);
4111 __ Addpl(x12, sp, -42);
4112
4113 // Restore the value of sp.
4114 __ Addpl(sp, x11, 39);
4115 __ Addpl(sp, sp, 5);
4116
4117 // Adjust x11 and x12 to make the test sp-agnostic.
4118 __ Sub(x11, sp, x11);
4119 __ Sub(x12, sp, x12);
4120
4121 // Check cases where xd.Is(xn). This stresses scratch register allocation.
4122 __ Mov(x20, x30);
4123 __ Mov(x21, x30);
4124 __ Mov(x22, x30);
4125 __ Addpl(x20, x20, 4);
4126 __ Addpl(x21, x21, 42);
4127 __ Addpl(x22, x22, -0x0080000000000000);
4128
4129 END();
4130
4131 if (CAN_RUN()) {
4132 RUN();
4133
4134 uint64_t vl = config->sve_vl_in_bytes();
4135 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4136 uint64_t pl = vl / kZRegBitsPerPRegBit;
4137
4138 ASSERT_EQUAL_64(base + (pl * 0), x0);
4139 ASSERT_EQUAL_64(base + (pl * 1), x1);
4140 ASSERT_EQUAL_64(base + (pl * 31), x2);
4141 ASSERT_EQUAL_64(base + (pl * -1), x3);
4142 ASSERT_EQUAL_64(base + (pl * -32), x4);
4143
4144 ASSERT_EQUAL_64(base + (pl * 32), x5);
4145 ASSERT_EQUAL_64(base + (pl * -33), x6);
4146
4147 ASSERT_EQUAL_64(base + (pl * 0x007fffffffffffff), x7);
4148 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x8);
4149
4150 ASSERT_EQUAL_64(pl * 8, x9);
4151 ASSERT_EQUAL_64(pl * 42, x10);
4152
4153 ASSERT_EQUAL_64(pl * 44, x11);
4154 ASSERT_EQUAL_64(pl * 84, x12);
4155
4156 ASSERT_EQUAL_64(base + (pl * 4), x20);
4157 ASSERT_EQUAL_64(base + (pl * 42), x21);
4158 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x22);
4159
4160 ASSERT_EQUAL_64(base, x30);
4161 }
4162}
4163
Jacob Bramley1314c462019-08-08 10:54:16 +01004164TEST_SVE(sve_adr_x) {
4165 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4166 START();
4167
4168 uint64_t base = 0x1234567800000000;
4169 __ Mov(x28, base);
4170 __ Mov(x29, 48);
4171 __ Mov(x30, -48);
4172
4173 // Simple scalar (or equivalent) cases.
4174
4175 __ Adr(x0, SVEMemOperand(x28));
4176 __ Adr(x1, SVEMemOperand(x28, 0));
4177 __ Adr(x2, SVEMemOperand(x28, 0, SVE_MUL_VL).ForZRegAccess());
4178 __ Adr(x3, SVEMemOperand(x28, 0, SVE_MUL_VL).ForPRegAccess());
4179 __ Adr(x4, SVEMemOperand(x28, xzr));
4180 __ Adr(x5, SVEMemOperand(x28, xzr, LSL, 42));
4181
4182 // scalar-plus-immediate
4183
4184 // Unscaled immediates, handled with `Add`.
4185 __ Adr(x6, SVEMemOperand(x28, 42));
4186 __ Adr(x7, SVEMemOperand(x28, -42));
4187 // Scaled immediates, handled with `Addvl` or `Addpl`.
4188 __ Adr(x8, SVEMemOperand(x28, 31, SVE_MUL_VL).ForZRegAccess());
4189 __ Adr(x9, SVEMemOperand(x28, -32, SVE_MUL_VL).ForZRegAccess());
4190 __ Adr(x10, SVEMemOperand(x28, 31, SVE_MUL_VL).ForPRegAccess());
4191 __ Adr(x11, SVEMemOperand(x28, -32, SVE_MUL_VL).ForPRegAccess());
4192 // Out of `addvl` or `addpl` range.
4193 __ Adr(x12, SVEMemOperand(x28, 42, SVE_MUL_VL).ForZRegAccess());
4194 __ Adr(x13, SVEMemOperand(x28, -42, SVE_MUL_VL).ForZRegAccess());
4195 __ Adr(x14, SVEMemOperand(x28, 42, SVE_MUL_VL).ForPRegAccess());
4196 __ Adr(x15, SVEMemOperand(x28, -42, SVE_MUL_VL).ForPRegAccess());
4197
4198 // scalar-plus-scalar
4199
4200 __ Adr(x18, SVEMemOperand(x28, x29));
4201 __ Adr(x19, SVEMemOperand(x28, x30));
4202 __ Adr(x20, SVEMemOperand(x28, x29, LSL, 8));
4203 __ Adr(x21, SVEMemOperand(x28, x30, LSL, 8));
4204
4205 // In-place updates, to stress scratch register allocation.
4206
4207 __ Mov(x22, 0xabcd000000000000);
4208 __ Mov(x23, 0xabcd101100000000);
4209 __ Mov(x24, 0xabcd202200000000);
4210 __ Mov(x25, 0xabcd303300000000);
4211 __ Mov(x26, 0xabcd404400000000);
4212 __ Mov(x27, 0xabcd505500000000);
4213
4214 __ Adr(x22, SVEMemOperand(x22));
4215 __ Adr(x23, SVEMemOperand(x23, 0x42));
4216 __ Adr(x24, SVEMemOperand(x24, 3, SVE_MUL_VL).ForZRegAccess());
4217 __ Adr(x25, SVEMemOperand(x25, 0x42, SVE_MUL_VL).ForPRegAccess());
4218 __ Adr(x26, SVEMemOperand(x26, x29));
4219 __ Adr(x27, SVEMemOperand(x27, x30, LSL, 4));
4220
4221 END();
4222
4223 if (CAN_RUN()) {
4224 RUN();
4225
4226 uint64_t vl = config->sve_vl_in_bytes();
4227 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4228 uint64_t pl = vl / kZRegBitsPerPRegBit;
4229
4230 // Simple scalar (or equivalent) cases.
4231 ASSERT_EQUAL_64(base, x0);
4232 ASSERT_EQUAL_64(base, x1);
4233 ASSERT_EQUAL_64(base, x2);
4234 ASSERT_EQUAL_64(base, x3);
4235 ASSERT_EQUAL_64(base, x4);
4236 ASSERT_EQUAL_64(base, x5);
4237
4238 // scalar-plus-immediate
4239 ASSERT_EQUAL_64(base + 42, x6);
4240 ASSERT_EQUAL_64(base - 42, x7);
4241 ASSERT_EQUAL_64(base + (31 * vl), x8);
4242 ASSERT_EQUAL_64(base - (32 * vl), x9);
4243 ASSERT_EQUAL_64(base + (31 * pl), x10);
4244 ASSERT_EQUAL_64(base - (32 * pl), x11);
4245 ASSERT_EQUAL_64(base + (42 * vl), x12);
4246 ASSERT_EQUAL_64(base - (42 * vl), x13);
4247 ASSERT_EQUAL_64(base + (42 * pl), x14);
4248 ASSERT_EQUAL_64(base - (42 * pl), x15);
4249
4250 // scalar-plus-scalar
4251 ASSERT_EQUAL_64(base + 48, x18);
4252 ASSERT_EQUAL_64(base - 48, x19);
4253 ASSERT_EQUAL_64(base + (48 << 8), x20);
4254 ASSERT_EQUAL_64(base - (48 << 8), x21);
4255
4256 // In-place updates.
4257 ASSERT_EQUAL_64(0xabcd000000000000, x22);
4258 ASSERT_EQUAL_64(0xabcd101100000000 + 0x42, x23);
4259 ASSERT_EQUAL_64(0xabcd202200000000 + (3 * vl), x24);
4260 ASSERT_EQUAL_64(0xabcd303300000000 + (0x42 * pl), x25);
4261 ASSERT_EQUAL_64(0xabcd404400000000 + 48, x26);
4262 ASSERT_EQUAL_64(0xabcd505500000000 - (48 << 4), x27);
4263
4264 // Check that the inputs were unmodified.
4265 ASSERT_EQUAL_64(base, x28);
4266 ASSERT_EQUAL_64(48, x29);
4267 ASSERT_EQUAL_64(-48, x30);
4268 }
4269}
4270
TatWai Chong4f28df72019-08-14 17:50:30 -07004271TEST_SVE(sve_permute_vector_unpredicated) {
4272 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
4273 START();
4274
4275 __ Mov(x0, 0x0123456789abcdef);
4276 __ Fmov(d0, RawbitsToDouble(0x7ffaaaaa22223456));
4277 __ Insr(z1.VnS(), w0);
4278 __ Insr(z2.VnD(), x0);
4279 __ Insr(z3.VnH(), h0);
4280 __ Insr(z4.VnD(), d0);
4281
4282 uint64_t inputs[] = {0xfedcba9876543210,
4283 0x0123456789abcdef,
4284 0x8f8e8d8c8b8a8988,
4285 0x8786858483828180};
4286
4287 // Initialize a distinguishable value throughout the register first.
4288 __ Dup(z9.VnB(), 0xff);
4289 InsrHelper(&masm, z9.VnD(), inputs);
4290
4291 __ Rev(z5.VnB(), z9.VnB());
4292 __ Rev(z6.VnH(), z9.VnH());
4293 __ Rev(z7.VnS(), z9.VnS());
4294 __ Rev(z8.VnD(), z9.VnD());
4295
4296 int index[7] = {22, 7, 7, 3, 1, 1, 63};
4297 // Broadcasting an data within the input array.
4298 __ Dup(z10.VnB(), z9.VnB(), index[0]);
4299 __ Dup(z11.VnH(), z9.VnH(), index[1]);
4300 __ Dup(z12.VnS(), z9.VnS(), index[2]);
4301 __ Dup(z13.VnD(), z9.VnD(), index[3]);
4302 __ Dup(z14.VnQ(), z9.VnQ(), index[4]);
4303 // Test dst == src
4304 __ Mov(z15, z9);
4305 __ Dup(z15.VnS(), z15.VnS(), index[5]);
4306 // Selecting an data beyond the input array.
4307 __ Dup(z16.VnB(), z9.VnB(), index[6]);
4308
4309 END();
4310
4311 if (CAN_RUN()) {
4312 RUN();
4313
4314 // Insr
4315 uint64_t z1_expected[] = {0x7f80f0017ff0f001, 0x7f80f00089abcdef};
4316 uint64_t z2_expected[] = {0x7ff0f0027f80f000, 0x0123456789abcdef};
4317 uint64_t z3_expected[] = {0xf0037f80f0017ff0, 0xf0037f80f0003456};
4318 uint64_t z4_expected[] = {0x7ff0f0047f80f000, 0x7ffaaaaa22223456};
4319 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
4320 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
4321 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
4322 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
4323
4324 // Rev
4325 int lane_count = core.GetSVELaneCount(kBRegSize);
4326 for (int i = 0; i < lane_count; i++) {
4327 uint64_t expected =
4328 core.zreg_lane(z5.GetCode(), kBRegSize, lane_count - i - 1);
4329 uint64_t input = core.zreg_lane(z9.GetCode(), kBRegSize, i);
4330 ASSERT_EQUAL_64(expected, input);
4331 }
4332
4333 lane_count = core.GetSVELaneCount(kHRegSize);
4334 for (int i = 0; i < lane_count; i++) {
4335 uint64_t expected =
4336 core.zreg_lane(z6.GetCode(), kHRegSize, lane_count - i - 1);
4337 uint64_t input = core.zreg_lane(z9.GetCode(), kHRegSize, i);
4338 ASSERT_EQUAL_64(expected, input);
4339 }
4340
4341 lane_count = core.GetSVELaneCount(kSRegSize);
4342 for (int i = 0; i < lane_count; i++) {
4343 uint64_t expected =
4344 core.zreg_lane(z7.GetCode(), kSRegSize, lane_count - i - 1);
4345 uint64_t input = core.zreg_lane(z9.GetCode(), kSRegSize, i);
4346 ASSERT_EQUAL_64(expected, input);
4347 }
4348
4349 lane_count = core.GetSVELaneCount(kDRegSize);
4350 for (int i = 0; i < lane_count; i++) {
4351 uint64_t expected =
4352 core.zreg_lane(z8.GetCode(), kDRegSize, lane_count - i - 1);
4353 uint64_t input = core.zreg_lane(z9.GetCode(), kDRegSize, i);
4354 ASSERT_EQUAL_64(expected, input);
4355 }
4356
4357 // Dup
4358 unsigned vl = config->sve_vl_in_bits();
4359 lane_count = core.GetSVELaneCount(kBRegSize);
4360 uint64_t expected_z10 = (vl > (index[0] * kBRegSize)) ? 0x23 : 0;
4361 for (int i = 0; i < lane_count; i++) {
4362 ASSERT_EQUAL_SVE_LANE(expected_z10, z10.VnB(), i);
4363 }
4364
4365 lane_count = core.GetSVELaneCount(kHRegSize);
4366 uint64_t expected_z11 = (vl > (index[1] * kHRegSize)) ? 0x8f8e : 0;
4367 for (int i = 0; i < lane_count; i++) {
4368 ASSERT_EQUAL_SVE_LANE(expected_z11, z11.VnH(), i);
4369 }
4370
4371 lane_count = core.GetSVELaneCount(kSRegSize);
4372 uint64_t expected_z12 = (vl > (index[2] * kSRegSize)) ? 0xfedcba98 : 0;
4373 for (int i = 0; i < lane_count; i++) {
4374 ASSERT_EQUAL_SVE_LANE(expected_z12, z12.VnS(), i);
4375 }
4376
4377 lane_count = core.GetSVELaneCount(kDRegSize);
4378 uint64_t expected_z13 =
4379 (vl > (index[3] * kDRegSize)) ? 0xfedcba9876543210 : 0;
4380 for (int i = 0; i < lane_count; i++) {
4381 ASSERT_EQUAL_SVE_LANE(expected_z13, z13.VnD(), i);
4382 }
4383
4384 lane_count = core.GetSVELaneCount(kDRegSize);
4385 uint64_t expected_z14_lo = 0;
4386 uint64_t expected_z14_hi = 0;
4387 if (vl > (index[4] * kQRegSize)) {
4388 expected_z14_lo = 0x0123456789abcdef;
4389 expected_z14_hi = 0xfedcba9876543210;
4390 }
4391 for (int i = 0; i < lane_count; i += 2) {
4392 ASSERT_EQUAL_SVE_LANE(expected_z14_lo, z14.VnD(), i);
4393 ASSERT_EQUAL_SVE_LANE(expected_z14_hi, z14.VnD(), i + 1);
4394 }
4395
4396 lane_count = core.GetSVELaneCount(kSRegSize);
4397 uint64_t expected_z15 = (vl > (index[5] * kSRegSize)) ? 0x87868584 : 0;
4398 for (int i = 0; i < lane_count; i++) {
4399 ASSERT_EQUAL_SVE_LANE(expected_z15, z15.VnS(), i);
4400 }
4401
4402 lane_count = core.GetSVELaneCount(kBRegSize);
4403 uint64_t expected_z16 = (vl > (index[6] * kBRegSize)) ? 0xff : 0;
4404 for (int i = 0; i < lane_count; i++) {
4405 ASSERT_EQUAL_SVE_LANE(expected_z16, z16.VnB(), i);
4406 }
4407 }
4408}
4409
4410TEST_SVE(sve_permute_vector_unpredicated_uppack_vector_elements) {
4411 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4412 START();
4413
4414 uint64_t z9_inputs[] = {0xfedcba9876543210,
4415 0x0123456789abcdef,
4416 0x8f8e8d8c8b8a8988,
4417 0x8786858483828180};
4418 InsrHelper(&masm, z9.VnD(), z9_inputs);
4419
4420 __ Sunpkhi(z10.VnH(), z9.VnB());
4421 __ Sunpkhi(z11.VnS(), z9.VnH());
4422 __ Sunpkhi(z12.VnD(), z9.VnS());
4423
4424 __ Sunpklo(z13.VnH(), z9.VnB());
4425 __ Sunpklo(z14.VnS(), z9.VnH());
4426 __ Sunpklo(z15.VnD(), z9.VnS());
4427
4428 __ Uunpkhi(z16.VnH(), z9.VnB());
4429 __ Uunpkhi(z17.VnS(), z9.VnH());
4430 __ Uunpkhi(z18.VnD(), z9.VnS());
4431
4432 __ Uunpklo(z19.VnH(), z9.VnB());
4433 __ Uunpklo(z20.VnS(), z9.VnH());
4434 __ Uunpklo(z21.VnD(), z9.VnS());
4435
4436 END();
4437
4438 if (CAN_RUN()) {
4439 RUN();
4440
4441 // Suunpkhi
4442 int lane_count = core.GetSVELaneCount(kHRegSize);
4443 for (int i = lane_count - 1; i >= 0; i--) {
4444 uint16_t expected = core.zreg_lane<uint16_t>(z10.GetCode(), i);
4445 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
4446 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
4447 ASSERT_EQUAL_64(expected, input);
4448 }
4449
4450 lane_count = core.GetSVELaneCount(kSRegSize);
4451 for (int i = lane_count - 1; i >= 0; i--) {
4452 uint32_t expected = core.zreg_lane<uint32_t>(z11.GetCode(), i);
4453 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
4454 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
4455 ASSERT_EQUAL_64(expected, input);
4456 }
4457
4458 lane_count = core.GetSVELaneCount(kDRegSize);
4459 for (int i = lane_count - 1; i >= 0; i--) {
4460 uint64_t expected = core.zreg_lane<uint64_t>(z12.GetCode(), i);
4461 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
4462 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
4463 ASSERT_EQUAL_64(expected, input);
4464 }
4465
4466 // Suunpklo
4467 lane_count = core.GetSVELaneCount(kHRegSize);
4468 for (int i = lane_count - 1; i >= 0; i--) {
4469 uint16_t expected = core.zreg_lane<uint16_t>(z13.GetCode(), i);
4470 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i);
4471 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
4472 ASSERT_EQUAL_64(expected, input);
4473 }
4474
4475 lane_count = core.GetSVELaneCount(kSRegSize);
4476 for (int i = lane_count - 1; i >= 0; i--) {
4477 uint32_t expected = core.zreg_lane<uint32_t>(z14.GetCode(), i);
4478 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i);
4479 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
4480 ASSERT_EQUAL_64(expected, input);
4481 }
4482
4483 lane_count = core.GetSVELaneCount(kDRegSize);
4484 for (int i = lane_count - 1; i >= 0; i--) {
4485 uint64_t expected = core.zreg_lane<uint64_t>(z15.GetCode(), i);
4486 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i);
4487 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
4488 ASSERT_EQUAL_64(expected, input);
4489 }
4490
4491 // Uuunpkhi
4492 lane_count = core.GetSVELaneCount(kHRegSize);
4493 for (int i = lane_count - 1; i >= 0; i--) {
4494 uint16_t expected = core.zreg_lane<uint16_t>(z16.GetCode(), i);
4495 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
4496 ASSERT_EQUAL_64(expected, input);
4497 }
4498
4499 lane_count = core.GetSVELaneCount(kSRegSize);
4500 for (int i = lane_count - 1; i >= 0; i--) {
4501 uint32_t expected = core.zreg_lane<uint32_t>(z17.GetCode(), i);
4502 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
4503 ASSERT_EQUAL_64(expected, input);
4504 }
4505
4506 lane_count = core.GetSVELaneCount(kDRegSize);
4507 for (int i = lane_count - 1; i >= 0; i--) {
4508 uint64_t expected = core.zreg_lane<uint64_t>(z18.GetCode(), i);
4509 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
4510 ASSERT_EQUAL_64(expected, input);
4511 }
4512
4513 // Uuunpklo
4514 lane_count = core.GetSVELaneCount(kHRegSize);
4515 for (int i = lane_count - 1; i >= 0; i--) {
4516 uint16_t expected = core.zreg_lane<uint16_t>(z19.GetCode(), i);
4517 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i);
4518 ASSERT_EQUAL_64(expected, input);
4519 }
4520
4521 lane_count = core.GetSVELaneCount(kSRegSize);
4522 for (int i = lane_count - 1; i >= 0; i--) {
4523 uint32_t expected = core.zreg_lane<uint32_t>(z20.GetCode(), i);
4524 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i);
4525 ASSERT_EQUAL_64(expected, input);
4526 }
4527
4528 lane_count = core.GetSVELaneCount(kDRegSize);
4529 for (int i = lane_count - 1; i >= 0; i--) {
4530 uint64_t expected = core.zreg_lane<uint64_t>(z21.GetCode(), i);
4531 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i);
4532 ASSERT_EQUAL_64(expected, input);
4533 }
4534 }
4535}
4536
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004537TEST_SVE(sve_cnot_not) {
4538 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4539 START();
4540
4541 uint64_t in[] = {0x0000000000000000, 0x00000000e1c30000, 0x123456789abcdef0};
4542
4543 // For simplicity, we re-use the same pg for various lane sizes.
4544 // For D lanes: 1, 1, 0
4545 // For S lanes: 1, 1, 1, 0, 0
4546 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4547 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4548 Initialise(&masm, p0.VnB(), pg_in);
4549 PRegisterM pg = p0.Merging();
4550
4551 // These are merging operations, so we have to initialise the result register.
4552 // We use a mixture of constructive and destructive operations.
4553
4554 InsrHelper(&masm, z31.VnD(), in);
4555 // Make a copy so we can check that constructions operations preserve zn.
4556 __ Mov(z30, z31);
4557
4558 // For constructive operations, use a different initial result value.
4559 __ Index(z29.VnB(), 0, -1);
4560
4561 __ Mov(z0, z31);
4562 __ Cnot(z0.VnB(), pg, z0.VnB()); // destructive
4563 __ Mov(z1, z29);
4564 __ Cnot(z1.VnH(), pg, z31.VnH());
4565 __ Mov(z2, z31);
4566 __ Cnot(z2.VnS(), pg, z2.VnS()); // destructive
4567 __ Mov(z3, z29);
4568 __ Cnot(z3.VnD(), pg, z31.VnD());
4569
4570 __ Mov(z4, z29);
4571 __ Not(z4.VnB(), pg, z31.VnB());
4572 __ Mov(z5, z31);
4573 __ Not(z5.VnH(), pg, z5.VnH()); // destructive
4574 __ Mov(z6, z29);
4575 __ Not(z6.VnS(), pg, z31.VnS());
4576 __ Mov(z7, z31);
4577 __ Not(z7.VnD(), pg, z7.VnD()); // destructive
4578
4579 END();
4580
4581 if (CAN_RUN()) {
4582 RUN();
4583
4584 // Check that constructive operations preserve their inputs.
4585 ASSERT_EQUAL_SVE(z30, z31);
4586
4587 // clang-format off
4588
4589 // Cnot (B) destructive
4590 uint64_t expected_z0[] =
4591 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
4592 {0x0000000001000101, 0x01000001e1000101, 0x12340078000000f0};
4593 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4594
4595 // Cnot (H)
4596 uint64_t expected_z1[] =
4597 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4598 {0xe9eaebecedee0001, 0xf1f2000100000001, 0xf9fafbfc0000ff00};
4599 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4600
4601 // Cnot (S) destructive
4602 uint64_t expected_z2[] =
4603 // pg: 0 1 1 1 0 0
4604 {0x0000000000000001, 0x0000000100000000, 0x123456789abcdef0};
4605 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4606
4607 // Cnot (D)
4608 uint64_t expected_z3[] =
4609 // pg: 1 1 0
4610 {0x0000000000000001, 0x0000000000000000, 0xf9fafbfcfdfeff00};
4611 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4612
4613 // Not (B)
4614 uint64_t expected_z4[] =
4615 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
4616 {0xe9eaebecffeeffff, 0xfff2f3fff53cffff, 0xf9faa9fc65432100};
4617 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
4618
4619 // Not (H) destructive
4620 uint64_t expected_z5[] =
4621 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4622 {0x000000000000ffff, 0x0000ffff1e3cffff, 0x123456786543def0};
4623 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
4624
4625 // Not (S)
4626 uint64_t expected_z6[] =
4627 // pg: 0 1 1 1 0 0
4628 {0xe9eaebecffffffff, 0xffffffff1e3cffff, 0xf9fafbfcfdfeff00};
4629 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
4630
4631 // Not (D) destructive
4632 uint64_t expected_z7[] =
4633 // pg: 1 1 0
4634 {0xffffffffffffffff, 0xffffffff1e3cffff, 0x123456789abcdef0};
4635 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
4636
4637 // clang-format on
4638 }
4639}
4640
4641TEST_SVE(sve_fabs_fneg) {
4642 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4643 START();
4644
4645 // Include FP64, FP32 and FP16 signalling NaNs. Most FP operations quieten
4646 // NaNs, but fabs and fneg do not.
4647 uint64_t in[] = {0xc04500004228d140, // Recognisable (+/-42) values.
4648 0xfff00000ff80fc01, // Signalling NaNs.
4649 0x123456789abcdef0};
4650
4651 // For simplicity, we re-use the same pg for various lane sizes.
4652 // For D lanes: 1, 1, 0
4653 // For S lanes: 1, 1, 1, 0, 0
4654 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4655 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4656 Initialise(&masm, p0.VnB(), pg_in);
4657 PRegisterM pg = p0.Merging();
4658
4659 // These are merging operations, so we have to initialise the result register.
4660 // We use a mixture of constructive and destructive operations.
4661
4662 InsrHelper(&masm, z31.VnD(), in);
4663 // Make a copy so we can check that constructions operations preserve zn.
4664 __ Mov(z30, z31);
4665
4666 // For constructive operations, use a different initial result value.
4667 __ Index(z29.VnB(), 0, -1);
4668
4669 __ Mov(z0, z29);
4670 __ Fabs(z0.VnH(), pg, z31.VnH());
4671 __ Mov(z1, z31);
4672 __ Fabs(z1.VnS(), pg, z1.VnS()); // destructive
4673 __ Mov(z2, z29);
4674 __ Fabs(z2.VnD(), pg, z31.VnD());
4675
4676 __ Mov(z3, z31);
4677 __ Fneg(z3.VnH(), pg, z3.VnH()); // destructive
4678 __ Mov(z4, z29);
4679 __ Fneg(z4.VnS(), pg, z31.VnS());
4680 __ Mov(z5, z31);
4681 __ Fneg(z5.VnD(), pg, z5.VnD()); // destructive
4682
4683 END();
4684
4685 if (CAN_RUN()) {
4686 RUN();
4687
4688 // Check that constructive operations preserve their inputs.
4689 ASSERT_EQUAL_SVE(z30, z31);
4690
4691 // clang-format off
4692
4693 // Fabs (H)
4694 uint64_t expected_z0[] =
4695 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4696 {0xe9eaebecedee5140, 0xf1f200007f807c01, 0xf9fafbfc1abcff00};
4697 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4698
4699 // Fabs (S) destructive
4700 uint64_t expected_z1[] =
4701 // pg: 0 1 1 1 0 0
4702 {0xc04500004228d140, 0x7ff000007f80fc01, 0x123456789abcdef0};
4703 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4704
4705 // Fabs (D)
4706 uint64_t expected_z2[] =
4707 // pg: 1 1 0
4708 {0x404500004228d140, 0x7ff00000ff80fc01, 0xf9fafbfcfdfeff00};
4709 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4710
4711 // Fneg (H) destructive
4712 uint64_t expected_z3[] =
4713 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4714 {0xc045000042285140, 0xfff080007f807c01, 0x123456781abcdef0};
4715 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4716
4717 // Fneg (S)
4718 uint64_t expected_z4[] =
4719 // pg: 0 1 1 1 0 0
4720 {0xe9eaebecc228d140, 0x7ff000007f80fc01, 0xf9fafbfcfdfeff00};
4721 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
4722
4723 // Fneg (D) destructive
4724 uint64_t expected_z5[] =
4725 // pg: 1 1 0
4726 {0x404500004228d140, 0x7ff00000ff80fc01, 0x123456789abcdef0};
4727 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
4728
4729 // clang-format on
4730 }
4731}
4732
4733TEST_SVE(sve_cls_clz_cnt) {
4734 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4735 START();
4736
4737 uint64_t in[] = {0x0000000000000000, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
4738
4739 // For simplicity, we re-use the same pg for various lane sizes.
4740 // For D lanes: 1, 1, 0
4741 // For S lanes: 1, 1, 1, 0, 0
4742 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4743 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4744 Initialise(&masm, p0.VnB(), pg_in);
4745 PRegisterM pg = p0.Merging();
4746
4747 // These are merging operations, so we have to initialise the result register.
4748 // We use a mixture of constructive and destructive operations.
4749
4750 InsrHelper(&masm, z31.VnD(), in);
4751 // Make a copy so we can check that constructions operations preserve zn.
4752 __ Mov(z30, z31);
4753
4754 // For constructive operations, use a different initial result value.
4755 __ Index(z29.VnB(), 0, -1);
4756
4757 __ Mov(z0, z29);
4758 __ Cls(z0.VnB(), pg, z31.VnB());
4759 __ Mov(z1, z31);
4760 __ Clz(z1.VnH(), pg, z1.VnH()); // destructive
4761 __ Mov(z2, z29);
4762 __ Cnt(z2.VnS(), pg, z31.VnS());
4763 __ Mov(z3, z31);
4764 __ Cnt(z3.VnD(), pg, z3.VnD()); // destructive
4765
4766 END();
4767
4768 if (CAN_RUN()) {
4769 RUN();
4770 // Check that non-destructive operations preserve their inputs.
4771 ASSERT_EQUAL_SVE(z30, z31);
4772
4773 // clang-format off
4774
4775 // cls (B)
4776 uint8_t expected_z0[] =
4777 // pg: 0 0 0 0 1 0 1 1
4778 // pg: 1 0 0 1 0 1 1 1
4779 // pg: 0 0 1 0 1 1 1 0
4780 {0xe9, 0xea, 0xeb, 0xec, 7, 0xee, 7, 7,
4781 6, 0xf2, 0xf3, 3, 0xf5, 1, 0, 3,
4782 0xf9, 0xfa, 0, 0xfc, 0, 0, 1, 0x00};
4783 ASSERT_EQUAL_SVE(expected_z0, z0.VnB());
4784
4785 // clz (H) destructive
4786 uint16_t expected_z1[] =
4787 // pg: 0 0 0 1
4788 // pg: 0 1 1 1
4789 // pg: 0 0 1 0
4790 {0x0000, 0x0000, 0x0000, 16,
4791 0xfefc, 0, 0, 0,
4792 0x1234, 0x5678, 0, 0xdef0};
4793 ASSERT_EQUAL_SVE(expected_z1, z1.VnH());
4794
4795 // cnt (S)
4796 uint32_t expected_z2[] =
4797 // pg: 0 1
4798 // pg: 1 1
4799 // pg: 0 0
4800 {0xe9eaebec, 0,
4801 22, 16,
4802 0xf9fafbfc, 0xfdfeff00};
4803 ASSERT_EQUAL_SVE(expected_z2, z2.VnS());
4804
4805 // cnt (D) destructive
4806 uint64_t expected_z3[] =
4807 // pg: 1 1 0
4808 { 0, 38, 0x123456789abcdef0};
4809 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4810
4811 // clang-format on
4812 }
4813}
4814
4815TEST_SVE(sve_sxt) {
4816 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4817 START();
4818
4819 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
4820
4821 // For simplicity, we re-use the same pg for various lane sizes.
4822 // For D lanes: 1, 1, 0
4823 // For S lanes: 1, 1, 1, 0, 0
4824 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4825 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4826 Initialise(&masm, p0.VnB(), pg_in);
4827 PRegisterM pg = p0.Merging();
4828
4829 // These are merging operations, so we have to initialise the result register.
4830 // We use a mixture of constructive and destructive operations.
4831
4832 InsrHelper(&masm, z31.VnD(), in);
4833 // Make a copy so we can check that constructions operations preserve zn.
4834 __ Mov(z30, z31);
4835
4836 // For constructive operations, use a different initial result value.
4837 __ Index(z29.VnB(), 0, -1);
4838
4839 __ Mov(z0, z31);
4840 __ Sxtb(z0.VnH(), pg, z0.VnH()); // destructive
4841 __ Mov(z1, z29);
4842 __ Sxtb(z1.VnS(), pg, z31.VnS());
4843 __ Mov(z2, z31);
4844 __ Sxtb(z2.VnD(), pg, z2.VnD()); // destructive
4845 __ Mov(z3, z29);
4846 __ Sxth(z3.VnS(), pg, z31.VnS());
4847 __ Mov(z4, z31);
4848 __ Sxth(z4.VnD(), pg, z4.VnD()); // destructive
4849 __ Mov(z5, z29);
4850 __ Sxtw(z5.VnD(), pg, z31.VnD());
4851
4852 END();
4853
4854 if (CAN_RUN()) {
4855 RUN();
4856 // Check that constructive operations preserve their inputs.
4857 ASSERT_EQUAL_SVE(z30, z31);
4858
4859 // clang-format off
4860
4861 // Sxtb (H) destructive
4862 uint64_t expected_z0[] =
4863 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4864 {0x01f203f405f6fff8, 0xfefcfff0ffc3000f, 0x12345678ffbcdef0};
4865 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4866
4867 // Sxtb (S)
4868 uint64_t expected_z1[] =
4869 // pg: 0 1 1 1 0 0
4870 {0xe9eaebecfffffff8, 0xfffffff00000000f, 0xf9fafbfcfdfeff00};
4871 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4872
4873 // Sxtb (D) destructive
4874 uint64_t expected_z2[] =
4875 // pg: 1 1 0
4876 {0xfffffffffffffff8, 0x000000000000000f, 0x123456789abcdef0};
4877 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4878
4879 // Sxth (S)
4880 uint64_t expected_z3[] =
4881 // pg: 0 1 1 1 0 0
4882 {0xe9eaebec000007f8, 0xfffff8f0ffff870f, 0xf9fafbfcfdfeff00};
4883 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4884
4885 // Sxth (D) destructive
4886 uint64_t expected_z4[] =
4887 // pg: 1 1 0
4888 {0x00000000000007f8, 0xffffffffffff870f, 0x123456789abcdef0};
4889 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
4890
4891 // Sxtw (D)
4892 uint64_t expected_z5[] =
4893 // pg: 1 1 0
4894 {0x0000000005f607f8, 0xffffffffe1c3870f, 0xf9fafbfcfdfeff00};
4895 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
4896
4897 // clang-format on
4898 }
4899}
4900
4901TEST_SVE(sve_uxt) {
4902 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4903 START();
4904
4905 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
4906
4907 // For simplicity, we re-use the same pg for various lane sizes.
4908 // For D lanes: 1, 1, 0
4909 // For S lanes: 1, 1, 1, 0, 0
4910 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4911 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4912 Initialise(&masm, p0.VnB(), pg_in);
4913 PRegisterM pg = p0.Merging();
4914
4915 // These are merging operations, so we have to initialise the result register.
4916 // We use a mixture of constructive and destructive operations.
4917
4918 InsrHelper(&masm, z31.VnD(), in);
4919 // Make a copy so we can check that constructions operations preserve zn.
4920 __ Mov(z30, z31);
4921
4922 // For constructive operations, use a different initial result value.
4923 __ Index(z29.VnB(), 0, -1);
4924
4925 __ Mov(z0, z29);
4926 __ Uxtb(z0.VnH(), pg, z31.VnH());
4927 __ Mov(z1, z31);
4928 __ Uxtb(z1.VnS(), pg, z1.VnS()); // destructive
4929 __ Mov(z2, z29);
4930 __ Uxtb(z2.VnD(), pg, z31.VnD());
4931 __ Mov(z3, z31);
4932 __ Uxth(z3.VnS(), pg, z3.VnS()); // destructive
4933 __ Mov(z4, z29);
4934 __ Uxth(z4.VnD(), pg, z31.VnD());
4935 __ Mov(z5, z31);
4936 __ Uxtw(z5.VnD(), pg, z5.VnD()); // destructive
4937
4938 END();
4939
4940 if (CAN_RUN()) {
4941 RUN();
4942 // clang-format off
4943
4944 // Uxtb (H)
4945 uint64_t expected_z0[] =
4946 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4947 {0xe9eaebecedee00f8, 0xf1f200f000c3000f, 0xf9fafbfc00bcff00};
4948 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4949
4950 // Uxtb (S) destructive
4951 uint64_t expected_z1[] =
4952 // pg: 0 1 1 1 0 0
4953 {0x01f203f4000000f8, 0x000000f00000000f, 0x123456789abcdef0};
4954 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4955
4956 // Uxtb (D)
4957 uint64_t expected_z2[] =
4958 // pg: 1 1 0
4959 {0x00000000000000f8, 0x000000000000000f, 0xf9fafbfcfdfeff00};
4960 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4961
4962 // Uxth (S) destructive
4963 uint64_t expected_z3[] =
4964 // pg: 0 1 1 1 0 0
4965 {0x01f203f4000007f8, 0x0000f8f00000870f, 0x123456789abcdef0};
4966 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4967
4968 // Uxth (D)
4969 uint64_t expected_z4[] =
4970 // pg: 1 1 0
4971 {0x00000000000007f8, 0x000000000000870f, 0xf9fafbfcfdfeff00};
4972 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
4973
4974 // Uxtw (D) destructive
4975 uint64_t expected_z5[] =
4976 // pg: 1 1 0
4977 {0x0000000005f607f8, 0x00000000e1c3870f, 0x123456789abcdef0};
4978 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
4979
4980 // clang-format on
4981 }
4982}
4983
4984TEST_SVE(sve_abs_neg) {
4985 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4986 START();
4987
4988 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
4989
4990 // For simplicity, we re-use the same pg for various lane sizes.
4991 // For D lanes: 1, 1, 0
4992 // For S lanes: 1, 1, 1, 0, 0
4993 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4994 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4995 Initialise(&masm, p0.VnB(), pg_in);
4996 PRegisterM pg = p0.Merging();
4997
4998 InsrHelper(&masm, z31.VnD(), in);
4999
5000 // These are merging operations, so we have to initialise the result register.
5001 // We use a mixture of constructive and destructive operations.
5002
5003 InsrHelper(&masm, z31.VnD(), in);
5004 // Make a copy so we can check that constructions operations preserve zn.
5005 __ Mov(z30, z31);
5006
5007 // For constructive operations, use a different initial result value.
5008 __ Index(z29.VnB(), 0, -1);
5009
5010 __ Mov(z0, z31);
5011 __ Abs(z0.VnD(), pg, z0.VnD()); // destructive
5012 __ Mov(z1, z29);
5013 __ Abs(z1.VnB(), pg, z31.VnB());
5014
5015 __ Mov(z2, z31);
5016 __ Neg(z2.VnH(), pg, z2.VnH()); // destructive
5017 __ Mov(z3, z29);
5018 __ Neg(z3.VnS(), pg, z31.VnS());
5019
5020 END();
5021
5022 if (CAN_RUN()) {
5023 RUN();
5024 // clang-format off
5025
5026 // Abs (D) destructive
5027 uint64_t expected_z0[] =
5028 // pg: 1 1 0
5029 {0x01f203f405f607f8, 0x0103070f1e3c78f1, 0x123456789abcdef0};
5030 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5031
5032 // Abs (B)
5033 uint64_t expected_z1[] =
5034 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
5035 {0xe9eaebec05ee0708, 0x02f2f310f53d790f, 0xf9fa56fc66442200};
5036 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5037
5038 // Neg (H) destructive
5039 uint64_t expected_z2[] =
5040 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5041 {0x01f203f405f6f808, 0xfefc07101e3d78f1, 0x123456786544def0};
5042 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5043
5044 // Neg (S)
5045 uint64_t expected_z3[] =
5046 // pg: 0 1 1 1 0 0
5047 {0xe9eaebecfa09f808, 0x010307101e3c78f1, 0xf9fafbfcfdfeff00};
5048 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5049
5050 // clang-format on
5051 }
5052}
5053
TatWai Chong4f28df72019-08-14 17:50:30 -07005054TEST_SVE(sve_permute_vector_unpredicated_table_lookup) {
5055 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5056 START();
5057
5058 uint64_t table_inputs[] = {0xffeeddccbbaa9988, 0x7766554433221100};
5059
5060 int index_b[] = {255, 255, 11, 10, 15, 14, 13, 12, 1, 0, 4, 3, 7, 6, 5, 4};
5061
5062 int index_h[] = {5, 6, 7, 8, 2, 3, 6, 4};
5063
5064 int index_s[] = {1, 3, 2, 31, -1};
5065
5066 int index_d[] = {31, 1};
5067
5068 // Initialize the register with a value that doesn't existed in the table.
5069 __ Dup(z9.VnB(), 0x1f);
5070 InsrHelper(&masm, z9.VnD(), table_inputs);
5071
5072 ZRegister ind_b = z0.WithLaneSize(kBRegSize);
5073 ZRegister ind_h = z1.WithLaneSize(kHRegSize);
5074 ZRegister ind_s = z2.WithLaneSize(kSRegSize);
5075 ZRegister ind_d = z3.WithLaneSize(kDRegSize);
5076
5077 InsrHelper(&masm, ind_b, index_b);
5078 InsrHelper(&masm, ind_h, index_h);
5079 InsrHelper(&masm, ind_s, index_s);
5080 InsrHelper(&masm, ind_d, index_d);
5081
5082 __ Tbl(z26.VnB(), z9.VnB(), ind_b);
5083
5084 __ Tbl(z27.VnH(), z9.VnH(), ind_h);
5085
5086 __ Tbl(z28.VnS(), z9.VnS(), ind_s);
5087
5088 __ Tbl(z29.VnD(), z9.VnD(), ind_d);
5089
5090 END();
5091
5092 if (CAN_RUN()) {
5093 RUN();
5094
5095 // clang-format off
5096 unsigned z26_expected[] = {0x1f, 0x1f, 0xbb, 0xaa, 0xff, 0xee, 0xdd, 0xcc,
5097 0x11, 0x00, 0x44, 0x33, 0x77, 0x66, 0x55, 0x44};
5098
5099 unsigned z27_expected[] = {0xbbaa, 0xddcc, 0xffee, 0x1f1f,
5100 0x5544, 0x7766, 0xddcc, 0x9988};
5101
5102 unsigned z28_expected[] =
5103 {0x77665544, 0xffeeddcc, 0xbbaa9988, 0x1f1f1f1f, 0x1f1f1f1f};
5104
5105 uint64_t z29_expected[] = {0x1f1f1f1f1f1f1f1f, 0xffeeddccbbaa9988};
5106 // clang-format on
5107
5108 unsigned vl = config->sve_vl_in_bits();
5109 for (size_t i = 0; i < ArrayLength(index_b); i++) {
5110 int lane = static_cast<int>(ArrayLength(index_b) - i - 1);
5111 if (!core.HasSVELane(z26.VnB(), lane)) break;
5112 uint64_t expected = (vl > (index_b[i] * kBRegSize)) ? z26_expected[i] : 0;
5113 ASSERT_EQUAL_SVE_LANE(expected, z26.VnB(), lane);
5114 }
5115
5116 for (size_t i = 0; i < ArrayLength(index_h); i++) {
5117 int lane = static_cast<int>(ArrayLength(index_h) - i - 1);
5118 if (!core.HasSVELane(z27.VnH(), lane)) break;
5119 uint64_t expected = (vl > (index_h[i] * kHRegSize)) ? z27_expected[i] : 0;
5120 ASSERT_EQUAL_SVE_LANE(expected, z27.VnH(), lane);
5121 }
5122
5123 for (size_t i = 0; i < ArrayLength(index_s); i++) {
5124 int lane = static_cast<int>(ArrayLength(index_s) - i - 1);
5125 if (!core.HasSVELane(z28.VnS(), lane)) break;
5126 uint64_t expected = (vl > (index_s[i] * kSRegSize)) ? z28_expected[i] : 0;
5127 ASSERT_EQUAL_SVE_LANE(expected, z28.VnS(), lane);
5128 }
5129
5130 for (size_t i = 0; i < ArrayLength(index_d); i++) {
5131 int lane = static_cast<int>(ArrayLength(index_d) - i - 1);
5132 if (!core.HasSVELane(z29.VnD(), lane)) break;
5133 uint64_t expected = (vl > (index_d[i] * kDRegSize)) ? z29_expected[i] : 0;
5134 ASSERT_EQUAL_SVE_LANE(expected, z29.VnD(), lane);
5135 }
5136 }
5137}
5138
Jacob Bramley199339d2019-08-05 18:49:13 +01005139TEST_SVE(ldr_str_z_bi) {
5140 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5141 START();
5142
5143 int vl = config->sve_vl_in_bytes();
5144
5145 // The immediate can address [-256, 255] times the VL, so allocate enough
5146 // space to exceed that in both directions.
5147 int data_size = vl * 1024;
5148
5149 uint8_t* data = new uint8_t[data_size];
5150 memset(data, 0, data_size);
5151
5152 // Set the base half-way through the buffer so we can use negative indices.
5153 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
5154
5155 __ Index(z1.VnB(), 1, 3);
5156 __ Index(z2.VnB(), 2, 5);
5157 __ Index(z3.VnB(), 3, 7);
5158 __ Index(z4.VnB(), 4, 11);
5159 __ Index(z5.VnB(), 5, 13);
5160 __ Index(z6.VnB(), 6, 2);
5161 __ Index(z7.VnB(), 7, 3);
5162 __ Index(z8.VnB(), 8, 5);
5163 __ Index(z9.VnB(), 9, 7);
5164
5165 // Encodable cases.
5166 __ Str(z1, SVEMemOperand(x0));
5167 __ Str(z2, SVEMemOperand(x0, 2, SVE_MUL_VL));
5168 __ Str(z3, SVEMemOperand(x0, -3, SVE_MUL_VL));
5169 __ Str(z4, SVEMemOperand(x0, 255, SVE_MUL_VL));
5170 __ Str(z5, SVEMemOperand(x0, -256, SVE_MUL_VL));
5171
5172 // Cases that fall back on `Adr`.
5173 __ Str(z6, SVEMemOperand(x0, 6 * vl));
5174 __ Str(z7, SVEMemOperand(x0, -7 * vl));
5175 __ Str(z8, SVEMemOperand(x0, 314, SVE_MUL_VL));
5176 __ Str(z9, SVEMemOperand(x0, -314, SVE_MUL_VL));
5177
5178 // Corresponding loads.
5179 __ Ldr(z11, SVEMemOperand(x0, xzr)); // Test xzr operand.
5180 __ Ldr(z12, SVEMemOperand(x0, 2, SVE_MUL_VL));
5181 __ Ldr(z13, SVEMemOperand(x0, -3, SVE_MUL_VL));
5182 __ Ldr(z14, SVEMemOperand(x0, 255, SVE_MUL_VL));
5183 __ Ldr(z15, SVEMemOperand(x0, -256, SVE_MUL_VL));
5184
5185 __ Ldr(z16, SVEMemOperand(x0, 6 * vl));
5186 __ Ldr(z17, SVEMemOperand(x0, -7 * vl));
5187 __ Ldr(z18, SVEMemOperand(x0, 314, SVE_MUL_VL));
5188 __ Ldr(z19, SVEMemOperand(x0, -314, SVE_MUL_VL));
5189
5190 END();
5191
5192 if (CAN_RUN()) {
5193 RUN();
5194
5195 uint8_t* expected = new uint8_t[data_size];
5196 memset(expected, 0, data_size);
5197 uint8_t* middle = &expected[data_size / 2];
5198
5199 for (int i = 0; i < vl; i++) {
5200 middle[i] = (1 + (3 * i)) & 0xff; // z1
5201 middle[(2 * vl) + i] = (2 + (5 * i)) & 0xff; // z2
5202 middle[(-3 * vl) + i] = (3 + (7 * i)) & 0xff; // z3
5203 middle[(255 * vl) + i] = (4 + (11 * i)) & 0xff; // z4
5204 middle[(-256 * vl) + i] = (5 + (13 * i)) & 0xff; // z5
5205 middle[(6 * vl) + i] = (6 + (2 * i)) & 0xff; // z6
5206 middle[(-7 * vl) + i] = (7 + (3 * i)) & 0xff; // z7
5207 middle[(314 * vl) + i] = (8 + (5 * i)) & 0xff; // z8
5208 middle[(-314 * vl) + i] = (9 + (7 * i)) & 0xff; // z9
5209 }
5210
5211 ASSERT_EQUAL_MEMORY(expected, data, data_size);
5212
5213 ASSERT_EQUAL_SVE(z1, z11);
5214 ASSERT_EQUAL_SVE(z2, z12);
5215 ASSERT_EQUAL_SVE(z3, z13);
5216 ASSERT_EQUAL_SVE(z4, z14);
5217 ASSERT_EQUAL_SVE(z5, z15);
5218 ASSERT_EQUAL_SVE(z6, z16);
5219 ASSERT_EQUAL_SVE(z7, z17);
5220 ASSERT_EQUAL_SVE(z8, z18);
5221 ASSERT_EQUAL_SVE(z9, z19);
5222
5223 delete[] expected;
5224 }
5225 delete[] data;
5226}
5227
5228TEST_SVE(ldr_str_p_bi) {
5229 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5230 START();
5231
5232 int vl = config->sve_vl_in_bytes();
5233 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
5234 int pl = vl / kZRegBitsPerPRegBit;
5235
5236 // The immediate can address [-256, 255] times the PL, so allocate enough
5237 // space to exceed that in both directions.
5238 int data_size = pl * 1024;
5239
5240 uint8_t* data = new uint8_t[data_size];
5241 memset(data, 0, data_size);
5242
5243 // Set the base half-way through the buffer so we can use negative indices.
5244 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
5245
5246 uint64_t pattern[4] = {0x1010101011101111,
5247 0x0010111011000101,
5248 0x1001101110010110,
5249 0x1010110101100011};
5250 for (int i = 8; i <= 15; i++) {
5251 // Initialise p8-p15 with a conveniently-recognisable, non-zero pattern.
5252 Initialise(&masm,
5253 PRegister(i),
5254 pattern[3] * i,
5255 pattern[2] * i,
5256 pattern[1] * i,
5257 pattern[0] * i);
5258 }
5259
5260 // Encodable cases.
5261 __ Str(p8, SVEMemOperand(x0));
5262 __ Str(p9, SVEMemOperand(x0, 2, SVE_MUL_VL));
5263 __ Str(p10, SVEMemOperand(x0, -3, SVE_MUL_VL));
5264 __ Str(p11, SVEMemOperand(x0, 255, SVE_MUL_VL));
5265
5266 // Cases that fall back on `Adr`.
5267 __ Str(p12, SVEMemOperand(x0, 6 * pl));
5268 __ Str(p13, SVEMemOperand(x0, -7 * pl));
5269 __ Str(p14, SVEMemOperand(x0, 314, SVE_MUL_VL));
5270 __ Str(p15, SVEMemOperand(x0, -314, SVE_MUL_VL));
5271
5272 // Corresponding loads.
5273 __ Ldr(p0, SVEMemOperand(x0));
5274 __ Ldr(p1, SVEMemOperand(x0, 2, SVE_MUL_VL));
5275 __ Ldr(p2, SVEMemOperand(x0, -3, SVE_MUL_VL));
5276 __ Ldr(p3, SVEMemOperand(x0, 255, SVE_MUL_VL));
5277
5278 __ Ldr(p4, SVEMemOperand(x0, 6 * pl));
5279 __ Ldr(p5, SVEMemOperand(x0, -7 * pl));
5280 __ Ldr(p6, SVEMemOperand(x0, 314, SVE_MUL_VL));
5281 __ Ldr(p7, SVEMemOperand(x0, -314, SVE_MUL_VL));
5282
5283 END();
5284
5285 if (CAN_RUN()) {
5286 RUN();
5287
5288 uint8_t* expected = new uint8_t[data_size];
5289 memset(expected, 0, data_size);
5290 uint8_t* middle = &expected[data_size / 2];
5291
5292 for (int i = 0; i < pl; i++) {
5293 int bit_index = (i % sizeof(pattern[0])) * kBitsPerByte;
5294 size_t index = i / sizeof(pattern[0]);
5295 VIXL_ASSERT(index < ArrayLength(pattern));
5296 uint64_t byte = (pattern[index] >> bit_index) & 0xff;
5297 // Each byte of `pattern` can be multiplied by 15 without carry.
5298 VIXL_ASSERT((byte * 15) <= 0xff);
5299
5300 middle[i] = byte * 8; // p8
5301 middle[(2 * pl) + i] = byte * 9; // p9
5302 middle[(-3 * pl) + i] = byte * 10; // p10
5303 middle[(255 * pl) + i] = byte * 11; // p11
5304 middle[(6 * pl) + i] = byte * 12; // p12
5305 middle[(-7 * pl) + i] = byte * 13; // p13
5306 middle[(314 * pl) + i] = byte * 14; // p14
5307 middle[(-314 * pl) + i] = byte * 15; // p15
5308 }
5309
5310 ASSERT_EQUAL_MEMORY(expected, data, data_size);
5311
5312 ASSERT_EQUAL_SVE(p0, p8);
5313 ASSERT_EQUAL_SVE(p1, p9);
5314 ASSERT_EQUAL_SVE(p2, p10);
5315 ASSERT_EQUAL_SVE(p3, p11);
5316 ASSERT_EQUAL_SVE(p4, p12);
5317 ASSERT_EQUAL_SVE(p5, p13);
5318 ASSERT_EQUAL_SVE(p6, p14);
5319 ASSERT_EQUAL_SVE(p7, p15);
5320
5321 delete[] expected;
5322 }
5323 delete[] data;
5324}
5325
Jacob Bramleye668b202019-08-14 17:57:34 +01005326template <typename T>
5327static void MemoryWrite(uint8_t* base, int64_t offset, int64_t index, T data) {
5328 memcpy(base + offset + (index * sizeof(data)), &data, sizeof(data));
5329}
5330
5331TEST_SVE(sve_ld1_st1_contiguous) {
5332 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5333 START();
5334
5335 int vl = config->sve_vl_in_bytes();
5336
5337 // The immediate can address [-8, 7] times the VL, so allocate enough space to
5338 // exceed that in both directions.
5339 int data_size = vl * 128;
5340
5341 uint8_t* data = new uint8_t[data_size];
5342 memset(data, 0, data_size);
5343
5344 // Set the base half-way through the buffer so we can use negative indeces.
5345 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
5346
Jacob Bramleye668b202019-08-14 17:57:34 +01005347 // Encodable scalar-plus-immediate cases.
5348 __ Index(z1.VnB(), 1, -3);
5349 __ Ptrue(p1.VnB());
5350 __ St1b(z1.VnB(), p1, SVEMemOperand(x0));
5351
5352 __ Index(z2.VnH(), -2, 5);
5353 __ Ptrue(p2.VnH(), SVE_MUL3);
5354 __ St1b(z2.VnH(), p2, SVEMemOperand(x0, 7, SVE_MUL_VL));
5355
5356 __ Index(z3.VnS(), 3, -7);
5357 __ Ptrue(p3.VnS(), SVE_POW2);
5358 __ St1h(z3.VnS(), p3, SVEMemOperand(x0, -8, SVE_MUL_VL));
5359
5360 // Encodable scalar-plus-scalar cases.
5361 __ Index(z4.VnD(), -4, 11);
5362 __ Ptrue(p4.VnD(), SVE_VL3);
5363 __ Addvl(x1, x0, 8); // Try not to overlap with VL-dependent cases.
5364 __ Mov(x2, 17);
5365 __ St1b(z4.VnD(), p4, SVEMemOperand(x1, x2));
5366
5367 __ Index(z5.VnD(), 6, -2);
5368 __ Ptrue(p5.VnD(), SVE_VL16);
TatWai Chong6205eb42019-09-24 10:07:20 +01005369 __ Addvl(x3, x0, 10); // Try not to overlap with VL-dependent cases.
5370 __ Mov(x4, 6);
5371 __ St1d(z5.VnD(), p5, SVEMemOperand(x3, x4, LSL, 3));
Jacob Bramleye668b202019-08-14 17:57:34 +01005372
5373 // Unencodable cases fall back on `Adr`.
5374 __ Index(z6.VnS(), -7, 3);
5375 // Setting SVE_ALL on B lanes checks that the Simulator ignores irrelevant
5376 // predicate bits when handling larger lanes.
5377 __ Ptrue(p6.VnB(), SVE_ALL);
5378 __ St1w(z6.VnS(), p6, SVEMemOperand(x0, 42, SVE_MUL_VL));
5379
TatWai Chong6205eb42019-09-24 10:07:20 +01005380 __ Index(z7.VnD(), 32, -11);
5381 __ Ptrue(p7.VnD(), SVE_MUL4);
5382 __ St1w(z7.VnD(), p7, SVEMemOperand(x0, 22, SVE_MUL_VL));
Jacob Bramleye668b202019-08-14 17:57:34 +01005383
TatWai Chong6205eb42019-09-24 10:07:20 +01005384 // Corresponding loads.
5385 __ Ld1b(z8.VnB(), p1.Zeroing(), SVEMemOperand(x0));
5386 __ Ld1b(z9.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
5387 __ Ld1h(z10.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
5388 __ Ld1b(z11.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
5389 __ Ld1d(z12.VnD(), p5.Zeroing(), SVEMemOperand(x3, x4, LSL, 3));
5390 __ Ld1w(z13.VnS(), p6.Zeroing(), SVEMemOperand(x0, 42, SVE_MUL_VL));
5391
5392 __ Ld1sb(z14.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
5393 __ Ld1sh(z15.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
5394 __ Ld1sb(z16.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
5395 __ Ld1sw(z17.VnD(), p7.Zeroing(), SVEMemOperand(x0, 22, SVE_MUL_VL));
5396
5397 // We can test ld1 by comparing the value loaded with the value stored. In
5398 // most cases, there are two complications:
5399 // - Loads have zeroing predication, so we have to clear the inactive
5400 // elements on our reference.
5401 // - We have to replicate any sign- or zero-extension.
5402
5403 // Ld1b(z8.VnB(), ...)
5404 __ Dup(z18.VnB(), 0);
5405 __ Mov(z18.VnB(), p1.Merging(), z1.VnB());
5406
5407 // Ld1b(z9.VnH(), ...)
5408 __ Dup(z19.VnH(), 0);
5409 __ Uxtb(z19.VnH(), p2.Merging(), z2.VnH());
5410
5411 // Ld1h(z10.VnS(), ...)
5412 __ Dup(z20.VnS(), 0);
5413 __ Uxth(z20.VnS(), p3.Merging(), z3.VnS());
5414
5415 // Ld1b(z11.VnD(), ...)
5416 __ Dup(z21.VnD(), 0);
5417 __ Uxtb(z21.VnD(), p4.Merging(), z4.VnD());
5418
5419 // Ld1d(z12.VnD(), ...)
5420 __ Dup(z22.VnD(), 0);
5421 __ Mov(z22.VnD(), p5.Merging(), z5.VnD());
5422
5423 // Ld1w(z13.VnS(), ...)
5424 __ Dup(z23.VnS(), 0);
5425 __ Mov(z23.VnS(), p6.Merging(), z6.VnS());
5426
5427 // Ld1sb(z14.VnH(), ...)
5428 __ Dup(z24.VnH(), 0);
5429 __ Sxtb(z24.VnH(), p2.Merging(), z2.VnH());
5430
5431 // Ld1sh(z15.VnS(), ...)
5432 __ Dup(z25.VnS(), 0);
5433 __ Sxth(z25.VnS(), p3.Merging(), z3.VnS());
5434
5435 // Ld1sb(z16.VnD(), ...)
5436 __ Dup(z26.VnD(), 0);
5437 __ Sxtb(z26.VnD(), p4.Merging(), z4.VnD());
5438
5439 // Ld1sw(z17.VnD(), ...)
5440 __ Dup(z27.VnD(), 0);
5441 __ Sxtw(z27.VnD(), p7.Merging(), z7.VnD());
Jacob Bramleye668b202019-08-14 17:57:34 +01005442
5443 END();
5444
5445 if (CAN_RUN()) {
5446 RUN();
5447
5448 uint8_t* expected = new uint8_t[data_size];
5449 memset(expected, 0, data_size);
5450 uint8_t* middle = &expected[data_size / 2];
5451
5452 int vl_b = vl / kBRegSizeInBytes;
5453 int vl_h = vl / kHRegSizeInBytes;
5454 int vl_s = vl / kSRegSizeInBytes;
5455 int vl_d = vl / kDRegSizeInBytes;
5456
5457 // Encodable cases.
5458
5459 // st1b { z1.b }, SVE_ALL
5460 for (int i = 0; i < vl_b; i++) {
5461 MemoryWrite(middle, 0, i, static_cast<uint8_t>(1 - (3 * i)));
5462 }
5463
5464 // st1b { z2.h }, SVE_MUL3
5465 int vl_h_mul3 = vl_h - (vl_h % 3);
5466 for (int i = 0; i < vl_h_mul3; i++) {
5467 MemoryWrite(middle, 7 * vl, i, static_cast<uint8_t>(-2 + (5 * i)));
5468 }
5469
5470 // st1h { z3.s }, SVE_POW2
5471 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
5472 for (int i = 0; i < vl_s_pow2; i++) {
5473 MemoryWrite(middle, -8 * vl, i, static_cast<uint16_t>(3 - (7 * i)));
5474 }
5475
5476 // st1b { z4.d }, SVE_VL3
5477 if (vl_d >= 3) {
5478 for (int i = 0; i < 3; i++) {
5479 MemoryWrite(middle,
5480 (8 * vl) + 17,
5481 i,
5482 static_cast<uint8_t>(-4 + (11 * i)));
5483 }
5484 }
5485
5486 // st1d { z5.d }, SVE_VL16
5487 if (vl_d >= 16) {
5488 for (int i = 0; i < 16; i++) {
5489 MemoryWrite(middle,
5490 (10 * vl) + (6 * kDRegSizeInBytes),
5491 i,
5492 static_cast<uint64_t>(6 - (2 * i)));
5493 }
5494 }
5495
5496 // Unencodable cases.
5497
5498 // st1w { z6.s }, SVE_ALL
5499 for (int i = 0; i < vl_s; i++) {
5500 MemoryWrite(middle, 42 * vl, i, static_cast<uint32_t>(-7 + (3 * i)));
5501 }
5502
TatWai Chong6205eb42019-09-24 10:07:20 +01005503 // st1w { z7.d }, SVE_MUL4
5504 int vl_d_mul4 = vl_d - (vl_d % 4);
5505 for (int i = 0; i < vl_d_mul4; i++) {
5506 MemoryWrite(middle, 22 * vl, i, static_cast<uint32_t>(32 + (-11 * i)));
5507 }
5508
Jacob Bramleye668b202019-08-14 17:57:34 +01005509 ASSERT_EQUAL_MEMORY(expected, data, data_size);
5510
TatWai Chong6205eb42019-09-24 10:07:20 +01005511 // Check that we loaded back the expected values.
5512
5513 ASSERT_EQUAL_SVE(z18, z8);
5514 ASSERT_EQUAL_SVE(z19, z9);
5515 ASSERT_EQUAL_SVE(z20, z10);
5516 ASSERT_EQUAL_SVE(z21, z11);
5517 ASSERT_EQUAL_SVE(z22, z12);
5518 ASSERT_EQUAL_SVE(z23, z13);
5519 ASSERT_EQUAL_SVE(z24, z14);
5520 ASSERT_EQUAL_SVE(z25, z15);
5521 ASSERT_EQUAL_SVE(z26, z16);
5522 ASSERT_EQUAL_SVE(z27, z17);
5523
Jacob Bramleye668b202019-08-14 17:57:34 +01005524 delete[] expected;
5525 }
5526 delete[] data;
5527}
5528
Jacob Bramleyd77a8e42019-02-12 16:52:24 +00005529} // namespace aarch64
5530} // namespace vixl