blob: 4ca28934ca0a28695343da400f39b89a7ef8b992 [file] [log] [blame]
Jacob Bramleyd77a8e42019-02-12 16:52:24 +00001// Copyright 2019, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7// * Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// * Neither the name of ARM Limited nor the names of its contributors may be
13// used to endorse or promote products derived from this software without
14// specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <sys/mman.h>
28
29#include <cfloat>
30#include <cmath>
31#include <cstdio>
32#include <cstdlib>
33#include <cstring>
34
35#include "test-runner.h"
36#include "test-utils.h"
37#include "aarch64/test-utils-aarch64.h"
38
39#include "aarch64/cpu-aarch64.h"
40#include "aarch64/disasm-aarch64.h"
41#include "aarch64/macro-assembler-aarch64.h"
42#include "aarch64/simulator-aarch64.h"
43#include "test-assembler-aarch64.h"
44
45namespace vixl {
46namespace aarch64 {
47
Jacob Bramleye8289202019-07-31 11:25:23 +010048Test* MakeSVETest(int vl, const char* name, Test::TestFunctionWithConfig* fn) {
49 // We never free this memory, but we need it to live for as long as the static
50 // linked list of tests, and this is the easiest way to do it.
51 Test* test = new Test(name, fn);
52 test->set_sve_vl_in_bits(vl);
53 return test;
54}
55
56// The TEST_SVE macro works just like the usual TEST macro, but the resulting
57// function receives a `const Test& config` argument, to allow it to query the
58// vector length.
59#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
60// On the Simulator, run SVE tests with several vector lengths, including the
61// extreme values and an intermediate value that isn't a power of two.
62
63#define TEST_SVE(name) \
64 void Test##name(Test* config); \
65 Test* test_##name##_list[] = \
66 {MakeSVETest(128, "AARCH64_ASM_" #name "_vl128", &Test##name), \
67 MakeSVETest(384, "AARCH64_ASM_" #name "_vl384", &Test##name), \
68 MakeSVETest(2048, "AARCH64_ASM_" #name "_vl2048", &Test##name)}; \
69 void Test##name(Test* config)
70
71#define SVE_SETUP_WITH_FEATURES(...) \
72 SETUP_WITH_FEATURES(__VA_ARGS__); \
73 simulator.SetVectorLengthInBits(config->sve_vl_in_bits())
74
75#else
76// Otherwise, just use whatever the hardware provides.
77static const int kSVEVectorLengthInBits =
78 CPUFeatures::InferFromOS().Has(CPUFeatures::kSVE)
79 ? CPU::ReadSVEVectorLengthInBits()
80 : 0;
81
82#define TEST_SVE(name) \
83 void Test##name(Test* config); \
84 Test* test_##name##_vlauto = MakeSVETest(kSVEVectorLengthInBits, \
85 "AARCH64_ASM_" #name "_vlauto", \
86 &Test##name); \
87 void Test##name(Test* config)
88
89#define SVE_SETUP_WITH_FEATURES(...) \
90 SETUP_WITH_FEATURES(__VA_ARGS__); \
91 USE(config)
92
93#endif
94
Jacob Bramley03c0b512019-02-22 16:42:06 +000095// Call masm->Insr repeatedly to allow test inputs to be set up concisely. This
96// is optimised for call-site clarity, not generated code quality, so it doesn't
97// exist in the MacroAssembler itself.
98//
99// Usage:
100//
101// int values[] = { 42, 43, 44 };
102// InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 }
103//
104// The rightmost (highest-indexed) array element maps to the lowest-numbered
105// lane.
106template <typename T, size_t N>
107void InsrHelper(MacroAssembler* masm,
108 const ZRegister& zdn,
109 const T (&values)[N]) {
110 for (size_t i = 0; i < N; i++) {
111 masm->Insr(zdn, values[i]);
112 }
113}
114
Jacob Bramley0ce75842019-07-17 18:12:50 +0100115// Conveniently initialise P registers with scalar bit patterns. The destination
116// lane size is ignored. This is optimised for call-site clarity, not generated
117// code quality.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100118//
119// Usage:
120//
Jacob Bramley0ce75842019-07-17 18:12:50 +0100121// Initialise(&masm, p0, 0x1234); // Sets p0 = 0b'0001'0010'0011'0100
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100122void Initialise(MacroAssembler* masm,
Jacob Bramley0ce75842019-07-17 18:12:50 +0100123 const PRegister& pd,
124 uint64_t value3,
125 uint64_t value2,
126 uint64_t value1,
127 uint64_t value0) {
128 // Generate a literal pool, as in the array form.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100129 UseScratchRegisterScope temps(masm);
130 Register temp = temps.AcquireX();
131 Label data;
132 Label done;
133
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100134 masm->Adr(temp, &data);
Jacob Bramley66e66712019-08-02 17:45:32 +0100135 masm->Ldr(pd, SVEMemOperand(temp));
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100136 masm->B(&done);
137 {
138 ExactAssemblyScope total(masm, kPRegMaxSizeInBytes);
139 masm->bind(&data);
Jacob Bramley0ce75842019-07-17 18:12:50 +0100140 masm->dc64(value0);
141 masm->dc64(value1);
142 masm->dc64(value2);
143 masm->dc64(value3);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100144 }
145 masm->Bind(&done);
146}
Jacob Bramley0ce75842019-07-17 18:12:50 +0100147void Initialise(MacroAssembler* masm,
148 const PRegister& pd,
149 uint64_t value2,
150 uint64_t value1,
151 uint64_t value0) {
152 Initialise(masm, pd, 0, value2, value1, value0);
153}
154void Initialise(MacroAssembler* masm,
155 const PRegister& pd,
156 uint64_t value1,
157 uint64_t value0) {
158 Initialise(masm, pd, 0, 0, value1, value0);
159}
160void Initialise(MacroAssembler* masm, const PRegister& pd, uint64_t value0) {
161 Initialise(masm, pd, 0, 0, 0, value0);
162}
163
164// Conveniently initialise P registers by lane. This is optimised for call-site
165// clarity, not generated code quality.
166//
167// Usage:
168//
169// int values[] = { 0x0, 0x1, 0x2 };
170// Initialise(&masm, p0.VnS(), values); // Sets p0 = 0b'0000'0001'0010
171//
172// The rightmost (highest-indexed) array element maps to the lowest-numbered
173// lane. Unspecified lanes are set to 0 (inactive).
174//
175// Each element of the `values` array is mapped onto a lane in `pd`. The
176// architecture only respects the lower bit, and writes zero the upper bits, but
177// other (encodable) values can be specified if required by the test.
178template <typename T, size_t N>
179void Initialise(MacroAssembler* masm,
180 const PRegisterWithLaneSize& pd,
181 const T (&values)[N]) {
182 // Turn the array into 64-bit chunks.
183 uint64_t chunks[4] = {0, 0, 0, 0};
184 VIXL_STATIC_ASSERT(sizeof(chunks) == kPRegMaxSizeInBytes);
185
186 int p_bits_per_lane = pd.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
187 VIXL_ASSERT((64 % p_bits_per_lane) == 0);
188 VIXL_ASSERT((N * p_bits_per_lane) <= kPRegMaxSize);
189
190 uint64_t p_lane_mask = GetUintMask(p_bits_per_lane);
191
192 VIXL_STATIC_ASSERT(N <= kPRegMaxSize);
193 size_t bit = 0;
194 for (int n = static_cast<int>(N - 1); n >= 0; n--) {
195 VIXL_ASSERT(bit < (sizeof(chunks) * kBitsPerByte));
196 uint64_t value = values[n] & p_lane_mask;
197 chunks[bit / 64] |= value << (bit % 64);
198 bit += p_bits_per_lane;
199 }
200
201 Initialise(masm, pd, chunks[3], chunks[2], chunks[1], chunks[0]);
202}
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100203
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000204// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100205TEST_SVE(sve_test_infrastructure_z) {
206 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000207 START();
208
Jacob Bramley03c0b512019-02-22 16:42:06 +0000209 __ Mov(x0, 0x0123456789abcdef);
210
211 // Test basic `Insr` behaviour.
212 __ Insr(z0.VnB(), 1);
213 __ Insr(z0.VnB(), 2);
214 __ Insr(z0.VnB(), x0);
215 __ Insr(z0.VnB(), -42);
216 __ Insr(z0.VnB(), 0);
217
218 // Test array inputs.
219 int z1_inputs[] = {3, 4, 5, -42, 0};
220 InsrHelper(&masm, z1.VnH(), z1_inputs);
221
222 // Test that sign-extension works as intended for various lane sizes.
223 __ Dup(z2.VnD(), 0); // Clear the register first.
224 __ Insr(z2.VnB(), -42); // 0xd6
225 __ Insr(z2.VnB(), 0xfe); // 0xfe
226 __ Insr(z2.VnH(), -42); // 0xffd6
227 __ Insr(z2.VnH(), 0xfedc); // 0xfedc
228 __ Insr(z2.VnS(), -42); // 0xffffffd6
229 __ Insr(z2.VnS(), 0xfedcba98); // 0xfedcba98
230 // Use another register for VnD(), so we can support 128-bit Z registers.
231 __ Insr(z3.VnD(), -42); // 0xffffffffffffffd6
232 __ Insr(z3.VnD(), 0xfedcba9876543210); // 0xfedcba9876543210
233
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000234 END();
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000235
Jacob Bramley119bd212019-04-16 10:13:09 +0100236 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100237 RUN();
Jacob Bramley03c0b512019-02-22 16:42:06 +0000238
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100239 // Test that array checks work properly on a register initialised
240 // lane-by-lane.
241 int z0_inputs_b[] = {0x01, 0x02, 0xef, 0xd6, 0x00};
242 ASSERT_EQUAL_SVE(z0_inputs_b, z0.VnB());
Jacob Bramley03c0b512019-02-22 16:42:06 +0000243
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100244 // Test that lane-by-lane checks work properly on a register initialised
245 // by array.
246 for (size_t i = 0; i < ArrayLength(z1_inputs); i++) {
247 // The rightmost (highest-indexed) array element maps to the
248 // lowest-numbered lane.
249 int lane = static_cast<int>(ArrayLength(z1_inputs) - i - 1);
250 ASSERT_EQUAL_SVE_LANE(z1_inputs[i], z1.VnH(), lane);
Jacob Bramley03c0b512019-02-22 16:42:06 +0000251 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100252
253 uint64_t z2_inputs_d[] = {0x0000d6feffd6fedc, 0xffffffd6fedcba98};
254 ASSERT_EQUAL_SVE(z2_inputs_d, z2.VnD());
255 uint64_t z3_inputs_d[] = {0xffffffffffffffd6, 0xfedcba9876543210};
256 ASSERT_EQUAL_SVE(z3_inputs_d, z3.VnD());
Jacob Bramley119bd212019-04-16 10:13:09 +0100257 }
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000258}
259
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100260// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100261TEST_SVE(sve_test_infrastructure_p) {
262 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100263 START();
264
265 // Simple cases: move boolean (0 or 1) values.
266
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100267 int p0_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100268 Initialise(&masm, p0.VnB(), p0_inputs);
269
270 int p1_inputs[] = {1, 0, 1, 1, 0, 1, 1, 1};
271 Initialise(&masm, p1.VnH(), p1_inputs);
272
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100273 int p2_inputs[] = {1, 1, 0, 1};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100274 Initialise(&masm, p2.VnS(), p2_inputs);
275
276 int p3_inputs[] = {0, 1};
277 Initialise(&masm, p3.VnD(), p3_inputs);
278
279 // Advanced cases: move numeric value into architecturally-ignored bits.
280
281 // B-sized lanes get one bit in a P register, so there are no ignored bits.
282
283 // H-sized lanes get two bits in a P register.
284 int p4_inputs[] = {0x3, 0x2, 0x1, 0x0, 0x1, 0x2, 0x3};
285 Initialise(&masm, p4.VnH(), p4_inputs);
286
287 // S-sized lanes get four bits in a P register.
288 int p5_inputs[] = {0xc, 0x7, 0x9, 0x6, 0xf};
289 Initialise(&masm, p5.VnS(), p5_inputs);
290
291 // D-sized lanes get eight bits in a P register.
292 int p6_inputs[] = {0x81, 0xcc, 0x55};
293 Initialise(&masm, p6.VnD(), p6_inputs);
294
295 // The largest possible P register has 32 bytes.
296 int p7_inputs[] = {0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
297 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
298 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
299 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f};
300 Initialise(&masm, p7.VnD(), p7_inputs);
301
302 END();
303
304 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100305 RUN();
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100306
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100307 // Test that lane-by-lane checks work properly. The rightmost
308 // (highest-indexed) array element maps to the lowest-numbered lane.
309 for (size_t i = 0; i < ArrayLength(p0_inputs); i++) {
310 int lane = static_cast<int>(ArrayLength(p0_inputs) - i - 1);
311 ASSERT_EQUAL_SVE_LANE(p0_inputs[i], p0.VnB(), lane);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100312 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100313 for (size_t i = 0; i < ArrayLength(p1_inputs); i++) {
314 int lane = static_cast<int>(ArrayLength(p1_inputs) - i - 1);
315 ASSERT_EQUAL_SVE_LANE(p1_inputs[i], p1.VnH(), lane);
316 }
317 for (size_t i = 0; i < ArrayLength(p2_inputs); i++) {
318 int lane = static_cast<int>(ArrayLength(p2_inputs) - i - 1);
319 ASSERT_EQUAL_SVE_LANE(p2_inputs[i], p2.VnS(), lane);
320 }
321 for (size_t i = 0; i < ArrayLength(p3_inputs); i++) {
322 int lane = static_cast<int>(ArrayLength(p3_inputs) - i - 1);
323 ASSERT_EQUAL_SVE_LANE(p3_inputs[i], p3.VnD(), lane);
324 }
325
326 // Test that array checks work properly on predicates initialised with a
327 // possibly-different lane size.
328 // 0b...11'10'01'00'01'10'11
329 int p4_expected[] = {0x39, 0x1b};
330 ASSERT_EQUAL_SVE(p4_expected, p4.VnD());
331
332 ASSERT_EQUAL_SVE(p5_inputs, p5.VnS());
333
334 // 0b...10000001'11001100'01010101
335 int p6_expected[] = {2, 0, 0, 1, 3, 0, 3, 0, 1, 1, 1, 1};
336 ASSERT_EQUAL_SVE(p6_expected, p6.VnH());
337
338 // 0b...10011100'10011101'10011110'10011111
339 int p7_expected[] = {1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1,
340 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1};
341 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100342 }
343}
344
Jacob Bramley935b15b2019-07-04 14:09:22 +0100345// Test that writes to V registers clear the high bits of the corresponding Z
346// register.
Jacob Bramleye8289202019-07-31 11:25:23 +0100347TEST_SVE(sve_v_write_clear) {
348 SVE_SETUP_WITH_FEATURES(CPUFeatures::kNEON,
349 CPUFeatures::kFP,
350 CPUFeatures::kSVE);
Jacob Bramley935b15b2019-07-04 14:09:22 +0100351 START();
352
353 // The Simulator has two mechansisms for writing V registers:
354 // - Write*Register, calling through to SimRegisterBase::Write.
355 // - LogicVRegister::ClearForWrite followed by one or more lane updates.
356 // Try to cover both variants.
357
358 // Prepare some known inputs.
359 uint8_t data[kQRegSizeInBytes];
360 for (size_t i = 0; i < kQRegSizeInBytes; i++) {
361 data[i] = 42 + i;
362 }
363 __ Mov(x10, reinterpret_cast<uintptr_t>(data));
364 __ Fmov(d30, 42.0);
365
Jacob Bramley199339d2019-08-05 18:49:13 +0100366 // Use Index to label the lane indices, so failures are easy to detect and
Jacob Bramley935b15b2019-07-04 14:09:22 +0100367 // diagnose.
368 __ Index(z0.VnB(), 0, 1);
369 __ Index(z1.VnB(), 0, 1);
370 __ Index(z2.VnB(), 0, 1);
371 __ Index(z3.VnB(), 0, 1);
372 __ Index(z4.VnB(), 0, 1);
373
374 __ Index(z10.VnB(), 0, -1);
375 __ Index(z11.VnB(), 0, -1);
376 __ Index(z12.VnB(), 0, -1);
377 __ Index(z13.VnB(), 0, -1);
378 __ Index(z14.VnB(), 0, -1);
379
380 // Instructions using Write*Register (and SimRegisterBase::Write).
381 __ Ldr(b0, MemOperand(x10));
382 __ Fcvt(h1, d30);
383 __ Fmov(s2, 1.5f);
384 __ Fmov(d3, d30);
385 __ Ldr(q4, MemOperand(x10));
386
387 // Instructions using LogicVRegister::ClearForWrite.
388 // These also (incidentally) test that across-lane instructions correctly
389 // ignore the high-order Z register lanes.
390 __ Sminv(b10, v10.V16B());
391 __ Addv(h11, v11.V4H());
392 __ Saddlv(s12, v12.V8H());
393 __ Dup(v13.V8B(), b13, kDRegSizeInBytes);
394 __ Uaddl(v14.V8H(), v14.V8B(), v14.V8B());
395
396 END();
397
398 if (CAN_RUN()) {
399 RUN();
400
401 // Check the Q part first.
402 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000002a, v0);
403 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000005140, v1); // 42.0 (f16)
404 ASSERT_EQUAL_128(0x0000000000000000, 0x000000003fc00000, v2); // 1.5 (f32)
405 ASSERT_EQUAL_128(0x0000000000000000, 0x4045000000000000, v3); // 42.0 (f64)
406 ASSERT_EQUAL_128(0x3938373635343332, 0x31302f2e2d2c2b2a, v4);
407 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000f1, v10); // -15
408 // 0xf9fa + 0xfbfc + 0xfdfe + 0xff00 -> 0xf2f4
409 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000f2f4, v11);
410 // 0xfffff1f2 + 0xfffff3f4 + ... + 0xfffffdfe + 0xffffff00 -> 0xffffc6c8
411 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffc6c8, v12);
412 ASSERT_EQUAL_128(0x0000000000000000, 0xf8f8f8f8f8f8f8f8, v13); // [-8] x 8
413 // [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
414 // + [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
415 // -> [0x01f2, 0x01f4, 0x01f6, 0x01f8, 0x01fa, 0x01fc, 0x01fe, 0x0000]
416 ASSERT_EQUAL_128(0x01f201f401f601f8, 0x01fa01fc01fe0000, v14);
417
418 // Check that the upper lanes are all clear.
419 for (int i = kQRegSizeInBytes; i < core.GetSVELaneCount(kBRegSize); i++) {
420 ASSERT_EQUAL_SVE_LANE(0x00, z0.VnB(), i);
421 ASSERT_EQUAL_SVE_LANE(0x00, z1.VnB(), i);
422 ASSERT_EQUAL_SVE_LANE(0x00, z2.VnB(), i);
423 ASSERT_EQUAL_SVE_LANE(0x00, z3.VnB(), i);
424 ASSERT_EQUAL_SVE_LANE(0x00, z4.VnB(), i);
425 ASSERT_EQUAL_SVE_LANE(0x00, z10.VnB(), i);
426 ASSERT_EQUAL_SVE_LANE(0x00, z11.VnB(), i);
427 ASSERT_EQUAL_SVE_LANE(0x00, z12.VnB(), i);
428 ASSERT_EQUAL_SVE_LANE(0x00, z13.VnB(), i);
429 ASSERT_EQUAL_SVE_LANE(0x00, z14.VnB(), i);
430 }
431 }
432}
433
Jacob Bramleye8289202019-07-31 11:25:23 +0100434static void MlaMlsHelper(Test* config, unsigned lane_size_in_bits) {
435 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley22023df2019-05-14 17:55:43 +0100436 START();
437
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100438 int zd_inputs[] = {0xbb, 0xcc, 0xdd, 0xee};
Jacob Bramley22023df2019-05-14 17:55:43 +0100439 int za_inputs[] = {-39, 1, -3, 2};
440 int zn_inputs[] = {-5, -20, 9, 8};
441 int zm_inputs[] = {9, -5, 4, 5};
442
443 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
444 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
445 ZRegister zn = z2.WithLaneSize(lane_size_in_bits);
446 ZRegister zm = z3.WithLaneSize(lane_size_in_bits);
447
448 // TODO: Use a simple `Dup` once it accepts arbitrary immediates.
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100449 InsrHelper(&masm, zd, zd_inputs);
Jacob Bramley22023df2019-05-14 17:55:43 +0100450 InsrHelper(&masm, za, za_inputs);
451 InsrHelper(&masm, zn, zn_inputs);
452 InsrHelper(&masm, zm, zm_inputs);
453
454 int p0_inputs[] = {1, 1, 0, 1};
455 int p1_inputs[] = {1, 0, 1, 1};
456 int p2_inputs[] = {0, 1, 1, 1};
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100457 int p3_inputs[] = {1, 1, 1, 0};
Jacob Bramley22023df2019-05-14 17:55:43 +0100458
459 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), p0_inputs);
460 Initialise(&masm, p1.WithLaneSize(lane_size_in_bits), p1_inputs);
461 Initialise(&masm, p2.WithLaneSize(lane_size_in_bits), p2_inputs);
462 Initialise(&masm, p3.WithLaneSize(lane_size_in_bits), p3_inputs);
463
464 // The Mla macro automatically selects between mla, mad and movprfx + mla
465 // based on what registers are aliased.
466 ZRegister mla_da_result = z10.WithLaneSize(lane_size_in_bits);
467 ZRegister mla_dn_result = z11.WithLaneSize(lane_size_in_bits);
468 ZRegister mla_dm_result = z12.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100469 ZRegister mla_d_result = z13.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100470
471 __ Mov(mla_da_result, za);
472 __ Mla(mla_da_result, p0.Merging(), mla_da_result, zn, zm);
473
474 __ Mov(mla_dn_result, zn);
475 __ Mla(mla_dn_result, p1.Merging(), za, mla_dn_result, zm);
476
477 __ Mov(mla_dm_result, zm);
478 __ Mla(mla_dm_result, p2.Merging(), za, zn, mla_dm_result);
479
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100480 __ Mov(mla_d_result, zd);
481 __ Mla(mla_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100482
483 // The Mls macro automatically selects between mls, msb and movprfx + mls
484 // based on what registers are aliased.
485 ZRegister mls_da_result = z20.WithLaneSize(lane_size_in_bits);
486 ZRegister mls_dn_result = z21.WithLaneSize(lane_size_in_bits);
487 ZRegister mls_dm_result = z22.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100488 ZRegister mls_d_result = z23.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100489
490 __ Mov(mls_da_result, za);
491 __ Mls(mls_da_result, p0.Merging(), mls_da_result, zn, zm);
492
493 __ Mov(mls_dn_result, zn);
494 __ Mls(mls_dn_result, p1.Merging(), za, mls_dn_result, zm);
495
496 __ Mov(mls_dm_result, zm);
497 __ Mls(mls_dm_result, p2.Merging(), za, zn, mls_dm_result);
498
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100499 __ Mov(mls_d_result, zd);
500 __ Mls(mls_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100501
502 END();
503
504 if (CAN_RUN()) {
505 RUN();
506
507 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
508 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits));
509 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits));
510
511 int mla[] = {-84, 101, 33, 42};
512 int mls[] = {6, -99, -39, -38};
513
514 int mla_da_expected[] = {mla[0], mla[1], za_inputs[2], mla[3]};
515 ASSERT_EQUAL_SVE(mla_da_expected, mla_da_result);
516
517 int mla_dn_expected[] = {mla[0], zn_inputs[1], mla[2], mla[3]};
518 ASSERT_EQUAL_SVE(mla_dn_expected, mla_dn_result);
519
520 int mla_dm_expected[] = {zm_inputs[0], mla[1], mla[2], mla[3]};
521 ASSERT_EQUAL_SVE(mla_dm_expected, mla_dm_result);
522
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100523 int mla_d_expected[] = {mla[0], mla[1], mla[2], zd_inputs[3]};
524 ASSERT_EQUAL_SVE(mla_d_expected, mla_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100525
526 int mls_da_expected[] = {mls[0], mls[1], za_inputs[2], mls[3]};
527 ASSERT_EQUAL_SVE(mls_da_expected, mls_da_result);
528
529 int mls_dn_expected[] = {mls[0], zn_inputs[1], mls[2], mls[3]};
530 ASSERT_EQUAL_SVE(mls_dn_expected, mls_dn_result);
531
532 int mls_dm_expected[] = {zm_inputs[0], mls[1], mls[2], mls[3]};
533 ASSERT_EQUAL_SVE(mls_dm_expected, mls_dm_result);
534
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100535 int mls_d_expected[] = {mls[0], mls[1], mls[2], zd_inputs[3]};
536 ASSERT_EQUAL_SVE(mls_d_expected, mls_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100537 }
538}
539
Jacob Bramleye8289202019-07-31 11:25:23 +0100540TEST_SVE(sve_mla_mls_b) { MlaMlsHelper(config, kBRegSize); }
541TEST_SVE(sve_mla_mls_h) { MlaMlsHelper(config, kHRegSize); }
542TEST_SVE(sve_mla_mls_s) { MlaMlsHelper(config, kSRegSize); }
543TEST_SVE(sve_mla_mls_d) { MlaMlsHelper(config, kDRegSize); }
Jacob Bramley22023df2019-05-14 17:55:43 +0100544
Jacob Bramleye8289202019-07-31 11:25:23 +0100545TEST_SVE(sve_bitwise_unpredicate_logical) {
546 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongcfb94212019-05-16 13:30:09 -0700547 START();
548
549 uint64_t z8_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
550 InsrHelper(&masm, z8.VnD(), z8_inputs);
551 uint64_t z15_inputs[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff};
552 InsrHelper(&masm, z15.VnD(), z15_inputs);
553
554 __ And(z1.VnD(), z8.VnD(), z15.VnD());
555 __ Bic(z2.VnD(), z8.VnD(), z15.VnD());
556 __ Eor(z3.VnD(), z8.VnD(), z15.VnD());
557 __ Orr(z4.VnD(), z8.VnD(), z15.VnD());
558
559 END();
560
561 if (CAN_RUN()) {
562 RUN();
563 uint64_t z1_expected[] = {0xfedcaa8854540000, 0x0000454588aacdef};
564 uint64_t z2_expected[] = {0x0000101022003210, 0x0123002201010000};
565 uint64_t z3_expected[] = {0x01235476ab89fedc, 0xcdef98ba67453210};
566 uint64_t z4_expected[] = {0xfffffefeffddfedc, 0xcdefddffefefffff};
567
568 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
569 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
570 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
571 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
572 }
TatWai Chongcfb94212019-05-16 13:30:09 -0700573}
574
Jacob Bramleye8289202019-07-31 11:25:23 +0100575TEST_SVE(sve_predicate_logical) {
576 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongf4fa8222019-06-17 12:08:14 -0700577 START();
578
579 // 0b...01011010'10110111
580 int p10_inputs[] = {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1}; // Pm
581 // 0b...11011001'01010010
582 int p11_inputs[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0}; // Pn
583 // 0b...01010101'10110010
584 int p12_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0}; // pg
585
586 Initialise(&masm, p10.VnB(), p10_inputs);
587 Initialise(&masm, p11.VnB(), p11_inputs);
588 Initialise(&masm, p12.VnB(), p12_inputs);
589
590 __ Ands(p0.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
591 __ Mrs(x0, NZCV);
592 __ Bics(p1.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
593 __ Mrs(x1, NZCV);
594 __ Eor(p2.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
595 __ Nand(p3.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
596 __ Nor(p4.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
597 __ Orn(p5.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
598 __ Orr(p6.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
599 __ Sel(p7.VnB(), p12, p11.VnB(), p10.VnB());
600
601 END();
602
603 if (CAN_RUN()) {
604 RUN();
605
606 // 0b...01010000'00010010
607 int p0_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0};
608 // 0b...00000001'00000000
609 int p1_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0};
610 // 0b...00000001'10100000
611 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
612 // 0b...00000101'10100000
613 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
614 // 0b...00000100'00000000
615 int p4_expected[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
616 // 0b...01010101'00010010
617 int p5_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0};
618 // 0b...01010001'10110010
619 int p6_expected[] = {0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
620 // 0b...01011011'00010111
621 int p7_expected[] = {0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1};
622
623 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
624 ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
625 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
626 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
627 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
628 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
629 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
630 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
631
TatWai Chong96713fe2019-06-04 16:39:37 -0700632 ASSERT_EQUAL_32(SVEFirstFlag, w0);
633 ASSERT_EQUAL_32(SVENotLastFlag, w1);
634 }
635}
TatWai Chongf4fa8222019-06-17 12:08:14 -0700636
Jacob Bramleye8289202019-07-31 11:25:23 +0100637TEST_SVE(sve_int_compare_vectors) {
638 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -0700639 START();
640
641 int z10_inputs[] = {0x00, 0x80, 0xff, 0x7f, 0x00, 0x00, 0x00, 0xff};
642 int z11_inputs[] = {0x00, 0x00, 0x00, 0x00, 0x80, 0xff, 0x7f, 0xfe};
643 int p0_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
644 InsrHelper(&masm, z10.VnB(), z10_inputs);
645 InsrHelper(&masm, z11.VnB(), z11_inputs);
646 Initialise(&masm, p0.VnB(), p0_inputs);
647
648 __ Cmphs(p6.VnB(), p0.Zeroing(), z10.VnB(), z11.VnB());
649 __ Mrs(x6, NZCV);
650
651 uint64_t z12_inputs[] = {0xffffffffffffffff, 0x8000000000000000};
652 uint64_t z13_inputs[] = {0x0000000000000000, 0x8000000000000000};
653 int p1_inputs[] = {1, 1};
654 InsrHelper(&masm, z12.VnD(), z12_inputs);
655 InsrHelper(&masm, z13.VnD(), z13_inputs);
656 Initialise(&masm, p1.VnD(), p1_inputs);
657
658 __ Cmphi(p7.VnD(), p1.Zeroing(), z12.VnD(), z13.VnD());
659 __ Mrs(x7, NZCV);
660
661 int z14_inputs[] = {0, 32767, -1, -32767, 0, 0, 0, 32766};
662 int z15_inputs[] = {0, 0, 0, 0, 32767, -1, -32767, 32767};
663
664 int p2_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
665 InsrHelper(&masm, z14.VnH(), z14_inputs);
666 InsrHelper(&masm, z15.VnH(), z15_inputs);
667 Initialise(&masm, p2.VnH(), p2_inputs);
668
669 __ Cmpge(p8.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
670 __ Mrs(x8, NZCV);
671
672 __ Cmpeq(p9.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
673 __ Mrs(x9, NZCV);
674
675 int z16_inputs[] = {0, -1, 0, 0};
676 int z17_inputs[] = {0, 0, 2147483647, -2147483648};
677 int p3_inputs[] = {1, 1, 1, 1};
678 InsrHelper(&masm, z16.VnS(), z16_inputs);
679 InsrHelper(&masm, z17.VnS(), z17_inputs);
680 Initialise(&masm, p3.VnS(), p3_inputs);
681
682 __ Cmpgt(p10.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
683 __ Mrs(x10, NZCV);
684
685 __ Cmpne(p11.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
686 __ Mrs(x11, NZCV);
687
688 // Architectural aliases testing.
689 __ Cmpls(p12.VnB(), p0.Zeroing(), z11.VnB(), z10.VnB()); // HS
690 __ Cmplo(p13.VnD(), p1.Zeroing(), z13.VnD(), z12.VnD()); // HI
691 __ Cmple(p14.VnH(), p2.Zeroing(), z15.VnH(), z14.VnH()); // GE
692 __ Cmplt(p15.VnS(), p3.Zeroing(), z17.VnS(), z16.VnS()); // GT
693
694 END();
695
696 if (CAN_RUN()) {
697 RUN();
698
699 int p6_expected[] = {1, 0, 1, 1, 0, 0, 0, 1};
700 for (size_t i = 0; i < ArrayLength(p6_expected); i++) {
701 int lane = static_cast<int>(ArrayLength(p6_expected) - i - 1);
702 ASSERT_EQUAL_SVE_LANE(p6_expected[i], p6.VnB(), lane);
703 }
704
705 int p7_expected[] = {1, 0};
706 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
707
708 int p8_expected[] = {1, 0, 0, 0, 0, 1, 1, 0};
709 ASSERT_EQUAL_SVE(p8_expected, p8.VnH());
710
711 int p9_expected[] = {1, 0, 0, 0, 0, 0, 0, 0};
712 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
713
714 int p10_expected[] = {0, 0, 0, 1};
715 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
716
717 int p11_expected[] = {0, 1, 1, 1};
718 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
719
720 // Reuse the expected results to verify the architectural aliases.
721 ASSERT_EQUAL_SVE(p6_expected, p12.VnB());
722 ASSERT_EQUAL_SVE(p7_expected, p13.VnD());
723 ASSERT_EQUAL_SVE(p8_expected, p14.VnH());
724 ASSERT_EQUAL_SVE(p10_expected, p15.VnS());
725
726 ASSERT_EQUAL_32(SVEFirstFlag, w6);
727 ASSERT_EQUAL_32(NoFlag, w7);
728 ASSERT_EQUAL_32(NoFlag, w8);
729 ASSERT_EQUAL_32(NoFlag, w9);
730 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
731 }
732}
733
Jacob Bramleye8289202019-07-31 11:25:23 +0100734TEST_SVE(sve_int_compare_vectors_wide_elements) {
735 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -0700736 START();
737
738 int src1_inputs_1[] = {0, 1, -1, -128, 127, 100, -66};
739 int src2_inputs_1[] = {0, -1};
740 int mask_inputs_1[] = {1, 1, 1, 1, 1, 0, 1};
741 InsrHelper(&masm, z13.VnB(), src1_inputs_1);
742 InsrHelper(&masm, z19.VnD(), src2_inputs_1);
743 Initialise(&masm, p0.VnB(), mask_inputs_1);
744
745 __ Cmpge(p2.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
746 __ Mrs(x2, NZCV);
747 __ Cmpgt(p3.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
748 __ Mrs(x3, NZCV);
749
750 int src1_inputs_2[] = {0, 32767, -1, -32767, 1, 1234, 0, 32766};
751 int src2_inputs_2[] = {0, -32767};
752 int mask_inputs_2[] = {1, 0, 1, 1, 1, 1, 1, 1};
753 InsrHelper(&masm, z13.VnH(), src1_inputs_2);
754 InsrHelper(&masm, z19.VnD(), src2_inputs_2);
755 Initialise(&masm, p0.VnH(), mask_inputs_2);
756
757 __ Cmple(p4.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
758 __ Mrs(x4, NZCV);
759 __ Cmplt(p5.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
760 __ Mrs(x5, NZCV);
761
762 int src1_inputs_3[] = {0, -1, 2147483647, -2147483648};
763 int src2_inputs_3[] = {0, -2147483648};
764 int mask_inputs_3[] = {1, 1, 1, 1};
765 InsrHelper(&masm, z13.VnS(), src1_inputs_3);
766 InsrHelper(&masm, z19.VnD(), src2_inputs_3);
767 Initialise(&masm, p0.VnS(), mask_inputs_3);
768
769 __ Cmpeq(p6.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
770 __ Mrs(x6, NZCV);
771 __ Cmpne(p7.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
772 __ Mrs(x7, NZCV);
773
774 int src1_inputs_4[] = {0x00, 0x80, 0x7f, 0xff, 0x7f, 0xf0, 0x0f, 0x55};
775 int src2_inputs_4[] = {0x00, 0x7f};
776 int mask_inputs_4[] = {1, 1, 1, 1, 0, 1, 1, 1};
777 InsrHelper(&masm, z13.VnB(), src1_inputs_4);
778 InsrHelper(&masm, z19.VnD(), src2_inputs_4);
779 Initialise(&masm, p0.VnB(), mask_inputs_4);
780
781 __ Cmplo(p8.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
782 __ Mrs(x8, NZCV);
783 __ Cmpls(p9.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
784 __ Mrs(x9, NZCV);
785
786 int src1_inputs_5[] = {0x0000, 0x8000, 0x7fff, 0xffff};
787 int src2_inputs_5[] = {0x8000, 0xffff};
788 int mask_inputs_5[] = {1, 1, 1, 1};
789 InsrHelper(&masm, z13.VnS(), src1_inputs_5);
790 InsrHelper(&masm, z19.VnD(), src2_inputs_5);
791 Initialise(&masm, p0.VnS(), mask_inputs_5);
792
793 __ Cmphi(p10.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
794 __ Mrs(x10, NZCV);
795 __ Cmphs(p11.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
796 __ Mrs(x11, NZCV);
797
798 END();
799
800 if (CAN_RUN()) {
801 RUN();
802 int p2_expected[] = {1, 1, 1, 0, 1, 0, 0};
803 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
804
805 int p3_expected[] = {1, 1, 0, 0, 1, 0, 0};
806 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
807
808 int p4_expected[] = {0x1, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
809 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
810
811 int p5_expected[] = {0x0, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
812 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
813
814 int p6_expected[] = {0x1, 0x0, 0x0, 0x1};
815 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
816
817 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
818 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
819
820 int p8_expected[] = {1, 0, 0, 0, 0, 0, 1, 1};
821 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
822
823 int p9_expected[] = {1, 0, 1, 0, 0, 0, 1, 1};
824 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
825
826 int p10_expected[] = {0x0, 0x0, 0x0, 0x0};
827 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
828
829 int p11_expected[] = {0x0, 0x1, 0x0, 0x1};
830 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
831
832 ASSERT_EQUAL_32(NoFlag, w2);
833 ASSERT_EQUAL_32(NoFlag, w3);
834 ASSERT_EQUAL_32(NoFlag, w4);
835 ASSERT_EQUAL_32(SVENotLastFlag, w5);
836 ASSERT_EQUAL_32(SVEFirstFlag, w6);
837 ASSERT_EQUAL_32(SVENotLastFlag, w7);
838 ASSERT_EQUAL_32(SVEFirstFlag, w8);
839 ASSERT_EQUAL_32(SVEFirstFlag, w9);
840 ASSERT_EQUAL_32(SVENotLastFlag | SVENoneFlag, w10);
841 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w11);
TatWai Chongf4fa8222019-06-17 12:08:14 -0700842 }
TatWai Chongf4fa8222019-06-17 12:08:14 -0700843}
844
Jacob Bramleye8289202019-07-31 11:25:23 +0100845TEST_SVE(sve_bitwise_imm) {
846 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chonga1885a52019-04-15 17:19:14 -0700847 START();
848
849 // clang-format off
850 uint64_t z21_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
851 uint32_t z22_inputs[] = {0xfedcba98, 0x76543210, 0x01234567, 0x89abcdef};
852 uint16_t z23_inputs[] = {0xfedc, 0xba98, 0x7654, 0x3210,
853 0x0123, 0x4567, 0x89ab, 0xcdef};
854 uint8_t z24_inputs[] = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
855 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef};
856 // clang-format on
857
858 InsrHelper(&masm, z1.VnD(), z21_inputs);
859 InsrHelper(&masm, z2.VnS(), z22_inputs);
860 InsrHelper(&masm, z3.VnH(), z23_inputs);
861 InsrHelper(&masm, z4.VnB(), z24_inputs);
862
863 __ And(z1.VnD(), z1.VnD(), 0x0000ffff0000ffff);
864 __ And(z2.VnS(), z2.VnS(), 0xff0000ff);
865 __ And(z3.VnH(), z3.VnH(), 0x0ff0);
866 __ And(z4.VnB(), z4.VnB(), 0x3f);
867
868 InsrHelper(&masm, z5.VnD(), z21_inputs);
869 InsrHelper(&masm, z6.VnS(), z22_inputs);
870 InsrHelper(&masm, z7.VnH(), z23_inputs);
871 InsrHelper(&masm, z8.VnB(), z24_inputs);
872
873 __ Eor(z5.VnD(), z5.VnD(), 0x0000ffff0000ffff);
874 __ Eor(z6.VnS(), z6.VnS(), 0xff0000ff);
875 __ Eor(z7.VnH(), z7.VnH(), 0x0ff0);
876 __ Eor(z8.VnB(), z8.VnB(), 0x3f);
877
878 InsrHelper(&masm, z9.VnD(), z21_inputs);
879 InsrHelper(&masm, z10.VnS(), z22_inputs);
880 InsrHelper(&masm, z11.VnH(), z23_inputs);
881 InsrHelper(&masm, z12.VnB(), z24_inputs);
882
883 __ Orr(z9.VnD(), z9.VnD(), 0x0000ffff0000ffff);
884 __ Orr(z10.VnS(), z10.VnS(), 0xff0000ff);
885 __ Orr(z11.VnH(), z11.VnH(), 0x0ff0);
886 __ Orr(z12.VnB(), z12.VnB(), 0x3f);
887
Jacob Bramley6069fd42019-06-24 10:20:45 +0100888 {
889 // The `Dup` macro maps onto either `dup` or `dupm`, but has its own test,
890 // so here we test `dupm` directly.
891 ExactAssemblyScope guard(&masm, 4 * kInstructionSize);
892 __ dupm(z13.VnD(), 0x7ffffff800000000);
893 __ dupm(z14.VnS(), 0x7ffc7ffc);
894 __ dupm(z15.VnH(), 0x3ffc);
895 __ dupm(z16.VnB(), 0xc3);
896 }
TatWai Chonga1885a52019-04-15 17:19:14 -0700897
898 END();
899
900 if (CAN_RUN()) {
901 RUN();
902
903 // clang-format off
904 uint64_t z1_expected[] = {0x0000ba9800003210, 0x000045670000cdef};
905 uint32_t z2_expected[] = {0xfe000098, 0x76000010, 0x01000067, 0x890000ef};
906 uint16_t z3_expected[] = {0x0ed0, 0x0a90, 0x0650, 0x0210,
907 0x0120, 0x0560, 0x09a0, 0x0de0};
908 uint8_t z4_expected[] = {0x3e, 0x1c, 0x3a, 0x18, 0x36, 0x14, 0x32, 0x10,
909 0x01, 0x23, 0x05, 0x27, 0x09, 0x2b, 0x0d, 0x2f};
910
911 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
912 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
913 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
914 ASSERT_EQUAL_SVE(z4_expected, z4.VnB());
915
916 uint64_t z5_expected[] = {0xfedc45677654cdef, 0x0123ba9889ab3210};
917 uint32_t z6_expected[] = {0x01dcba67, 0x895432ef, 0xfe234598, 0x76abcd10};
918 uint16_t z7_expected[] = {0xf12c, 0xb568, 0x79a4, 0x3de0,
919 0x0ed3, 0x4a97, 0x865b, 0xc21f};
920 uint8_t z8_expected[] = {0xc1, 0xe3, 0x85, 0xa7, 0x49, 0x6b, 0x0d, 0x2f,
921 0x3e, 0x1c, 0x7a, 0x58, 0xb6, 0x94, 0xf2, 0xd0};
922
923 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
924 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
925 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
926 ASSERT_EQUAL_SVE(z8_expected, z8.VnB());
927
928 uint64_t z9_expected[] = {0xfedcffff7654ffff, 0x0123ffff89abffff};
929 uint32_t z10_expected[] = {0xffdcbaff, 0xff5432ff, 0xff2345ff, 0xffabcdff};
930 uint16_t z11_expected[] = {0xfffc, 0xbff8, 0x7ff4, 0x3ff0,
931 0x0ff3, 0x4ff7, 0x8ffb, 0xcfff};
932 uint8_t z12_expected[] = {0xff, 0xff, 0xbf, 0xbf, 0x7f, 0x7f, 0x3f, 0x3f,
933 0x3f, 0x3f, 0x7f, 0x7f, 0xbf, 0xbf, 0xff, 0xff};
934
935 ASSERT_EQUAL_SVE(z9_expected, z9.VnD());
936 ASSERT_EQUAL_SVE(z10_expected, z10.VnS());
937 ASSERT_EQUAL_SVE(z11_expected, z11.VnH());
938 ASSERT_EQUAL_SVE(z12_expected, z12.VnB());
939
940 uint64_t z13_expected[] = {0x7ffffff800000000, 0x7ffffff800000000};
941 uint32_t z14_expected[] = {0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc};
942 uint16_t z15_expected[] = {0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc,
943 0x3ffc, 0x3ffc, 0x3ffc ,0x3ffc};
944 ASSERT_EQUAL_SVE(z13_expected, z13.VnD());
945 ASSERT_EQUAL_SVE(z14_expected, z14.VnS());
946 ASSERT_EQUAL_SVE(z15_expected, z15.VnH());
947 // clang-format on
948 }
TatWai Chonga1885a52019-04-15 17:19:14 -0700949}
950
Jacob Bramleye8289202019-07-31 11:25:23 +0100951TEST_SVE(sve_dup_imm) {
Jacob Bramley6069fd42019-06-24 10:20:45 +0100952 // The `Dup` macro can generate `dup`, `dupm`, and it can synthesise
953 // unencodable immediates.
954
Jacob Bramleye8289202019-07-31 11:25:23 +0100955 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley6069fd42019-06-24 10:20:45 +0100956 START();
957
958 // Encodable with `dup` (shift 0).
959 __ Dup(z0.VnD(), -1);
960 __ Dup(z1.VnS(), 0x7f);
961 __ Dup(z2.VnH(), -0x80);
962 __ Dup(z3.VnB(), 42);
963
964 // Encodable with `dup` (shift 8).
TatWai Chong6995bfd2019-09-26 10:48:05 +0100965 __ Dup(z4.VnD(), -42 * 256);
966 __ Dup(z5.VnS(), -0x8000);
967 __ Dup(z6.VnH(), 0x7f00);
Jacob Bramley6069fd42019-06-24 10:20:45 +0100968 // B-sized lanes cannot take a shift of 8.
969
970 // Encodable with `dupm` (but not `dup`).
971 __ Dup(z10.VnD(), 0x3fc);
972 __ Dup(z11.VnS(), -516097); // 0xfff81fff, as a signed int.
973 __ Dup(z12.VnH(), 0x0001);
974 // All values that fit B-sized lanes are encodable with `dup`.
975
976 // Cases that require immediate synthesis.
977 __ Dup(z20.VnD(), 0x1234);
978 __ Dup(z21.VnD(), -4242);
979 __ Dup(z22.VnD(), 0xfedcba9876543210);
980 __ Dup(z23.VnS(), 0x01020304);
981 __ Dup(z24.VnS(), -0x01020304);
982 __ Dup(z25.VnH(), 0x3c38);
983 // All values that fit B-sized lanes are directly encodable.
984
985 END();
986
987 if (CAN_RUN()) {
988 RUN();
989
990 ASSERT_EQUAL_SVE(0xffffffffffffffff, z0.VnD());
991 ASSERT_EQUAL_SVE(0x0000007f, z1.VnS());
992 ASSERT_EQUAL_SVE(0xff80, z2.VnH());
993 ASSERT_EQUAL_SVE(0x2a, z3.VnB());
994
TatWai Chong6995bfd2019-09-26 10:48:05 +0100995 ASSERT_EQUAL_SVE(0xffffffffffffd600, z4.VnD());
996 ASSERT_EQUAL_SVE(0xffff8000, z5.VnS());
997 ASSERT_EQUAL_SVE(0x7f00, z6.VnH());
Jacob Bramley6069fd42019-06-24 10:20:45 +0100998
999 ASSERT_EQUAL_SVE(0x00000000000003fc, z10.VnD());
1000 ASSERT_EQUAL_SVE(0xfff81fff, z11.VnS());
1001 ASSERT_EQUAL_SVE(0x0001, z12.VnH());
1002
1003 ASSERT_EQUAL_SVE(0x1234, z20.VnD());
1004 ASSERT_EQUAL_SVE(0xffffffffffffef6e, z21.VnD());
1005 ASSERT_EQUAL_SVE(0xfedcba9876543210, z22.VnD());
1006 ASSERT_EQUAL_SVE(0x01020304, z23.VnS());
1007 ASSERT_EQUAL_SVE(0xfefdfcfc, z24.VnS());
1008 ASSERT_EQUAL_SVE(0x3c38, z25.VnH());
1009 }
1010}
1011
Jacob Bramleye8289202019-07-31 11:25:23 +01001012TEST_SVE(sve_inc_dec_p_scalar) {
1013 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001014 START();
1015
1016 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1017 Initialise(&masm, p0.VnB(), p0_inputs);
1018
1019 int p0_b_count = 9;
1020 int p0_h_count = 5;
1021 int p0_s_count = 3;
1022 int p0_d_count = 2;
1023
1024 // 64-bit operations preserve their high bits.
1025 __ Mov(x0, 0x123456780000002a);
1026 __ Decp(x0, p0.VnB());
1027
1028 __ Mov(x1, 0x123456780000002a);
1029 __ Incp(x1, p0.VnH());
1030
1031 // Check that saturation does not occur.
1032 __ Mov(x10, 1);
1033 __ Decp(x10, p0.VnS());
1034
1035 __ Mov(x11, UINT64_MAX);
1036 __ Incp(x11, p0.VnD());
1037
1038 __ Mov(x12, INT64_MAX);
1039 __ Incp(x12, p0.VnB());
1040
1041 // With an all-true predicate, these instructions increment or decrement by
1042 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001043 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001044
1045 __ Mov(x20, 0x4000000000000000);
1046 __ Decp(x20, p15.VnB());
1047
1048 __ Mov(x21, 0x4000000000000000);
1049 __ Incp(x21, p15.VnH());
1050
1051 END();
1052 if (CAN_RUN()) {
1053 RUN();
1054
1055 ASSERT_EQUAL_64(0x123456780000002a - p0_b_count, x0);
1056 ASSERT_EQUAL_64(0x123456780000002a + p0_h_count, x1);
1057
1058 ASSERT_EQUAL_64(UINT64_C(1) - p0_s_count, x10);
1059 ASSERT_EQUAL_64(UINT64_MAX + p0_d_count, x11);
1060 ASSERT_EQUAL_64(static_cast<uint64_t>(INT64_MAX) + p0_b_count, x12);
1061
1062 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1063 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1064 }
1065}
1066
Jacob Bramleye8289202019-07-31 11:25:23 +01001067TEST_SVE(sve_sqinc_sqdec_p_scalar) {
1068 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001069 START();
1070
1071 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1072 Initialise(&masm, p0.VnB(), p0_inputs);
1073
1074 int p0_b_count = 9;
1075 int p0_h_count = 5;
1076 int p0_s_count = 3;
1077 int p0_d_count = 2;
1078
1079 uint64_t dummy_high = 0x1234567800000000;
1080
1081 // 64-bit operations preserve their high bits.
1082 __ Mov(x0, dummy_high + 42);
1083 __ Sqdecp(x0, p0.VnB());
1084
1085 __ Mov(x1, dummy_high + 42);
1086 __ Sqincp(x1, p0.VnH());
1087
1088 // 32-bit operations sign-extend into their high bits.
1089 __ Mov(x2, dummy_high + 42);
1090 __ Sqdecp(x2, p0.VnS(), w2);
1091
1092 __ Mov(x3, dummy_high + 42);
1093 __ Sqincp(x3, p0.VnD(), w3);
1094
1095 __ Mov(x4, dummy_high + 1);
1096 __ Sqdecp(x4, p0.VnS(), w4);
1097
1098 __ Mov(x5, dummy_high - 1);
1099 __ Sqincp(x5, p0.VnD(), w5);
1100
1101 // Check that saturation behaves correctly.
1102 __ Mov(x10, 0x8000000000000001); // INT64_MIN + 1
1103 __ Sqdecp(x10, p0.VnB(), x10);
1104
1105 __ Mov(x11, dummy_high + 0x80000001); // INT32_MIN + 1
1106 __ Sqdecp(x11, p0.VnH(), w11);
1107
1108 __ Mov(x12, 1);
1109 __ Sqdecp(x12, p0.VnS(), x12);
1110
1111 __ Mov(x13, dummy_high + 1);
1112 __ Sqdecp(x13, p0.VnD(), w13);
1113
1114 __ Mov(x14, 0x7ffffffffffffffe); // INT64_MAX - 1
1115 __ Sqincp(x14, p0.VnB(), x14);
1116
1117 __ Mov(x15, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1118 __ Sqincp(x15, p0.VnH(), w15);
1119
1120 // Don't use x16 and x17 since they are scratch registers by default.
1121
1122 __ Mov(x18, 0xffffffffffffffff);
1123 __ Sqincp(x18, p0.VnS(), x18);
1124
1125 __ Mov(x19, dummy_high + 0xffffffff);
1126 __ Sqincp(x19, p0.VnD(), w19);
1127
1128 __ Mov(x20, dummy_high + 0xffffffff);
1129 __ Sqdecp(x20, p0.VnB(), w20);
1130
1131 // With an all-true predicate, these instructions increment or decrement by
1132 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001133 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001134
1135 __ Mov(x21, 0);
1136 __ Sqdecp(x21, p15.VnB(), x21);
1137
1138 __ Mov(x22, 0);
1139 __ Sqincp(x22, p15.VnH(), x22);
1140
1141 __ Mov(x23, dummy_high);
1142 __ Sqdecp(x23, p15.VnS(), w23);
1143
1144 __ Mov(x24, dummy_high);
1145 __ Sqincp(x24, p15.VnD(), w24);
1146
1147 END();
1148 if (CAN_RUN()) {
1149 RUN();
1150
1151 // 64-bit operations preserve their high bits.
1152 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1153 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1154
1155 // 32-bit operations sign-extend into their high bits.
1156 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1157 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1158 ASSERT_EQUAL_64(0xffffffff00000000 | (1 - p0_s_count), x4);
1159 ASSERT_EQUAL_64(p0_d_count - 1, x5);
1160
1161 // Check that saturation behaves correctly.
1162 ASSERT_EQUAL_64(INT64_MIN, x10);
1163 ASSERT_EQUAL_64(INT32_MIN, x11);
1164 ASSERT_EQUAL_64(1 - p0_s_count, x12);
1165 ASSERT_EQUAL_64(1 - p0_d_count, x13);
1166 ASSERT_EQUAL_64(INT64_MAX, x14);
1167 ASSERT_EQUAL_64(INT32_MAX, x15);
1168 ASSERT_EQUAL_64(p0_s_count - 1, x18);
1169 ASSERT_EQUAL_64(p0_d_count - 1, x19);
1170 ASSERT_EQUAL_64(-1 - p0_b_count, x20);
1171
1172 // Check all-true predicates.
1173 ASSERT_EQUAL_64(-core.GetSVELaneCount(kBRegSize), x21);
1174 ASSERT_EQUAL_64(core.GetSVELaneCount(kHRegSize), x22);
1175 ASSERT_EQUAL_64(-core.GetSVELaneCount(kSRegSize), x23);
1176 ASSERT_EQUAL_64(core.GetSVELaneCount(kDRegSize), x24);
1177 }
1178}
1179
Jacob Bramleye8289202019-07-31 11:25:23 +01001180TEST_SVE(sve_uqinc_uqdec_p_scalar) {
1181 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001182 START();
1183
1184 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1185 Initialise(&masm, p0.VnB(), p0_inputs);
1186
1187 int p0_b_count = 9;
1188 int p0_h_count = 5;
1189 int p0_s_count = 3;
1190 int p0_d_count = 2;
1191
1192 uint64_t dummy_high = 0x1234567800000000;
1193
1194 // 64-bit operations preserve their high bits.
1195 __ Mov(x0, dummy_high + 42);
1196 __ Uqdecp(x0, p0.VnB());
1197
1198 __ Mov(x1, dummy_high + 42);
1199 __ Uqincp(x1, p0.VnH());
1200
1201 // 32-bit operations zero-extend into their high bits.
1202 __ Mov(x2, dummy_high + 42);
1203 __ Uqdecp(x2, p0.VnS(), w2);
1204
1205 __ Mov(x3, dummy_high + 42);
1206 __ Uqincp(x3, p0.VnD(), w3);
1207
1208 __ Mov(x4, dummy_high + 0x80000001);
1209 __ Uqdecp(x4, p0.VnS(), w4);
1210
1211 __ Mov(x5, dummy_high + 0x7fffffff);
1212 __ Uqincp(x5, p0.VnD(), w5);
1213
1214 // Check that saturation behaves correctly.
1215 __ Mov(x10, 1);
1216 __ Uqdecp(x10, p0.VnB(), x10);
1217
1218 __ Mov(x11, dummy_high + 1);
1219 __ Uqdecp(x11, p0.VnH(), w11);
1220
1221 __ Mov(x12, 0x8000000000000000); // INT64_MAX + 1
1222 __ Uqdecp(x12, p0.VnS(), x12);
1223
1224 __ Mov(x13, dummy_high + 0x80000000); // INT32_MAX + 1
1225 __ Uqdecp(x13, p0.VnD(), w13);
1226
1227 __ Mov(x14, 0xfffffffffffffffe); // UINT64_MAX - 1
1228 __ Uqincp(x14, p0.VnB(), x14);
1229
1230 __ Mov(x15, dummy_high + 0xfffffffe); // UINT32_MAX - 1
1231 __ Uqincp(x15, p0.VnH(), w15);
1232
1233 // Don't use x16 and x17 since they are scratch registers by default.
1234
1235 __ Mov(x18, 0x7ffffffffffffffe); // INT64_MAX - 1
1236 __ Uqincp(x18, p0.VnS(), x18);
1237
1238 __ Mov(x19, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1239 __ Uqincp(x19, p0.VnD(), w19);
1240
1241 // With an all-true predicate, these instructions increment or decrement by
1242 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001243 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001244
1245 __ Mov(x20, 0x4000000000000000);
1246 __ Uqdecp(x20, p15.VnB(), x20);
1247
1248 __ Mov(x21, 0x4000000000000000);
1249 __ Uqincp(x21, p15.VnH(), x21);
1250
1251 __ Mov(x22, dummy_high + 0x40000000);
1252 __ Uqdecp(x22, p15.VnS(), w22);
1253
1254 __ Mov(x23, dummy_high + 0x40000000);
1255 __ Uqincp(x23, p15.VnD(), w23);
1256
1257 END();
1258 if (CAN_RUN()) {
1259 RUN();
1260
1261 // 64-bit operations preserve their high bits.
1262 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1263 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1264
1265 // 32-bit operations zero-extend into their high bits.
1266 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1267 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1268 ASSERT_EQUAL_64(UINT64_C(0x80000001) - p0_s_count, x4);
1269 ASSERT_EQUAL_64(UINT64_C(0x7fffffff) + p0_d_count, x5);
1270
1271 // Check that saturation behaves correctly.
1272 ASSERT_EQUAL_64(0, x10);
1273 ASSERT_EQUAL_64(0, x11);
1274 ASSERT_EQUAL_64(0x8000000000000000 - p0_s_count, x12);
1275 ASSERT_EQUAL_64(UINT64_C(0x80000000) - p0_d_count, x13);
1276 ASSERT_EQUAL_64(UINT64_MAX, x14);
1277 ASSERT_EQUAL_64(UINT32_MAX, x15);
1278 ASSERT_EQUAL_64(0x7ffffffffffffffe + p0_s_count, x18);
1279 ASSERT_EQUAL_64(UINT64_C(0x7ffffffe) + p0_d_count, x19);
1280
1281 // Check all-true predicates.
1282 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1283 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1284 ASSERT_EQUAL_64(0x40000000 - core.GetSVELaneCount(kSRegSize), x22);
1285 ASSERT_EQUAL_64(0x40000000 + core.GetSVELaneCount(kDRegSize), x23);
1286 }
1287}
1288
Jacob Bramleye8289202019-07-31 11:25:23 +01001289TEST_SVE(sve_inc_dec_p_vector) {
1290 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001291 START();
1292
1293 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1294 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1295 Initialise(&masm, p0.VnB(), p0_inputs);
1296
1297 // Check that saturation does not occur.
1298
1299 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1300 InsrHelper(&masm, z0.VnD(), z0_inputs);
1301
1302 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1303 InsrHelper(&masm, z1.VnD(), z1_inputs);
1304
1305 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1306 InsrHelper(&masm, z2.VnS(), z2_inputs);
1307
1308 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1309 InsrHelper(&masm, z3.VnH(), z3_inputs);
1310
1311 // The MacroAssembler implements non-destructive operations using movprfx.
1312 __ Decp(z10.VnD(), p0, z0.VnD());
1313 __ Decp(z11.VnD(), p0, z1.VnD());
1314 __ Decp(z12.VnS(), p0, z2.VnS());
1315 __ Decp(z13.VnH(), p0, z3.VnH());
1316
1317 __ Incp(z14.VnD(), p0, z0.VnD());
1318 __ Incp(z15.VnD(), p0, z1.VnD());
1319 __ Incp(z16.VnS(), p0, z2.VnS());
1320 __ Incp(z17.VnH(), p0, z3.VnH());
1321
1322 // Also test destructive forms.
1323 __ Mov(z4, z0);
1324 __ Mov(z5, z1);
1325 __ Mov(z6, z2);
1326 __ Mov(z7, z3);
1327
1328 __ Decp(z0.VnD(), p0);
1329 __ Decp(z1.VnD(), p0);
1330 __ Decp(z2.VnS(), p0);
1331 __ Decp(z3.VnH(), p0);
1332
1333 __ Incp(z4.VnD(), p0);
1334 __ Incp(z5.VnD(), p0);
1335 __ Incp(z6.VnS(), p0);
1336 __ Incp(z7.VnH(), p0);
1337
1338 END();
1339 if (CAN_RUN()) {
1340 RUN();
1341
1342 // z0_inputs[...] - number of active D lanes (2)
1343 int64_t z0_expected[] = {0x1234567800000040, -2, -1, 0x7ffffffffffffffe};
1344 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1345
1346 // z1_inputs[...] - number of active D lanes (2)
1347 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1348 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1349
1350 // z2_inputs[...] - number of active S lanes (3)
1351 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, 0x7ffffffd};
1352 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1353
1354 // z3_inputs[...] - number of active H lanes (5)
1355 int16_t z3_expected[] = {0x1225, -5, -4, -6, 0x7ffb, 0x7ffa};
1356 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1357
1358 // z0_inputs[...] + number of active D lanes (2)
1359 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1360 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1361
1362 // z1_inputs[...] + number of active D lanes (2)
1363 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, 0x8000000000000001};
1364 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1365
1366 // z2_inputs[...] + number of active S lanes (3)
1367 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, 0x80000002, 0x80000003};
1368 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1369
1370 // z3_inputs[...] + number of active H lanes (5)
1371 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, 0x8004};
1372 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1373
1374 // Check that the non-destructive macros produced the same results.
1375 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1376 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1377 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1378 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1379 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1380 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1381 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1382 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1383 }
1384}
1385
Jacob Bramleye8289202019-07-31 11:25:23 +01001386TEST_SVE(sve_inc_dec_ptrue_vector) {
1387 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001388 START();
1389
1390 // With an all-true predicate, these instructions increment or decrement by
1391 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001392 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001393
1394 __ Dup(z0.VnD(), 0);
1395 __ Decp(z0.VnD(), p15);
1396
1397 __ Dup(z1.VnS(), 0);
1398 __ Decp(z1.VnS(), p15);
1399
1400 __ Dup(z2.VnH(), 0);
1401 __ Decp(z2.VnH(), p15);
1402
1403 __ Dup(z3.VnD(), 0);
1404 __ Incp(z3.VnD(), p15);
1405
1406 __ Dup(z4.VnS(), 0);
1407 __ Incp(z4.VnS(), p15);
1408
1409 __ Dup(z5.VnH(), 0);
1410 __ Incp(z5.VnH(), p15);
1411
1412 END();
1413 if (CAN_RUN()) {
1414 RUN();
1415
1416 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1417 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1418 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1419
1420 for (int i = 0; i < d_lane_count; i++) {
1421 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1422 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1423 }
1424
1425 for (int i = 0; i < s_lane_count; i++) {
1426 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1427 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1428 }
1429
1430 for (int i = 0; i < h_lane_count; i++) {
1431 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1432 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1433 }
1434 }
1435}
1436
Jacob Bramleye8289202019-07-31 11:25:23 +01001437TEST_SVE(sve_sqinc_sqdec_p_vector) {
1438 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001439 START();
1440
1441 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1442 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1443 Initialise(&masm, p0.VnB(), p0_inputs);
1444
1445 // Check that saturation behaves correctly.
1446
1447 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1448 InsrHelper(&masm, z0.VnD(), z0_inputs);
1449
1450 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1451 InsrHelper(&masm, z1.VnD(), z1_inputs);
1452
1453 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1454 InsrHelper(&masm, z2.VnS(), z2_inputs);
1455
1456 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1457 InsrHelper(&masm, z3.VnH(), z3_inputs);
1458
1459 // The MacroAssembler implements non-destructive operations using movprfx.
1460 __ Sqdecp(z10.VnD(), p0, z0.VnD());
1461 __ Sqdecp(z11.VnD(), p0, z1.VnD());
1462 __ Sqdecp(z12.VnS(), p0, z2.VnS());
1463 __ Sqdecp(z13.VnH(), p0, z3.VnH());
1464
1465 __ Sqincp(z14.VnD(), p0, z0.VnD());
1466 __ Sqincp(z15.VnD(), p0, z1.VnD());
1467 __ Sqincp(z16.VnS(), p0, z2.VnS());
1468 __ Sqincp(z17.VnH(), p0, z3.VnH());
1469
1470 // Also test destructive forms.
1471 __ Mov(z4, z0);
1472 __ Mov(z5, z1);
1473 __ Mov(z6, z2);
1474 __ Mov(z7, z3);
1475
1476 __ Sqdecp(z0.VnD(), p0);
1477 __ Sqdecp(z1.VnD(), p0);
1478 __ Sqdecp(z2.VnS(), p0);
1479 __ Sqdecp(z3.VnH(), p0);
1480
1481 __ Sqincp(z4.VnD(), p0);
1482 __ Sqincp(z5.VnD(), p0);
1483 __ Sqincp(z6.VnS(), p0);
1484 __ Sqincp(z7.VnH(), p0);
1485
1486 END();
1487 if (CAN_RUN()) {
1488 RUN();
1489
1490 // z0_inputs[...] - number of active D lanes (2)
1491 int64_t z0_expected[] = {0x1234567800000040, -2, -1, INT64_MIN};
1492 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1493
1494 // z1_inputs[...] - number of active D lanes (2)
1495 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1496 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1497
1498 // z2_inputs[...] - number of active S lanes (3)
1499 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, INT32_MIN};
1500 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1501
1502 // z3_inputs[...] - number of active H lanes (5)
1503 int16_t z3_expected[] = {0x1225, -5, -4, -6, INT16_MIN, 0x7ffa};
1504 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1505
1506 // z0_inputs[...] + number of active D lanes (2)
1507 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1508 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1509
1510 // z1_inputs[...] + number of active D lanes (2)
1511 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, INT64_MAX};
1512 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1513
1514 // z2_inputs[...] + number of active S lanes (3)
1515 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, INT32_MAX, 0x80000003};
1516 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1517
1518 // z3_inputs[...] + number of active H lanes (5)
1519 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, INT16_MAX};
1520 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1521
1522 // Check that the non-destructive macros produced the same results.
1523 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1524 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1525 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1526 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1527 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1528 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1529 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1530 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1531 }
1532}
1533
Jacob Bramleye8289202019-07-31 11:25:23 +01001534TEST_SVE(sve_sqinc_sqdec_ptrue_vector) {
1535 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001536 START();
1537
1538 // With an all-true predicate, these instructions increment or decrement by
1539 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001540 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001541
1542 __ Dup(z0.VnD(), 0);
1543 __ Sqdecp(z0.VnD(), p15);
1544
1545 __ Dup(z1.VnS(), 0);
1546 __ Sqdecp(z1.VnS(), p15);
1547
1548 __ Dup(z2.VnH(), 0);
1549 __ Sqdecp(z2.VnH(), p15);
1550
1551 __ Dup(z3.VnD(), 0);
1552 __ Sqincp(z3.VnD(), p15);
1553
1554 __ Dup(z4.VnS(), 0);
1555 __ Sqincp(z4.VnS(), p15);
1556
1557 __ Dup(z5.VnH(), 0);
1558 __ Sqincp(z5.VnH(), p15);
1559
1560 END();
1561 if (CAN_RUN()) {
1562 RUN();
1563
1564 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1565 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1566 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1567
1568 for (int i = 0; i < d_lane_count; i++) {
1569 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1570 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1571 }
1572
1573 for (int i = 0; i < s_lane_count; i++) {
1574 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1575 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1576 }
1577
1578 for (int i = 0; i < h_lane_count; i++) {
1579 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1580 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1581 }
1582 }
1583}
1584
Jacob Bramleye8289202019-07-31 11:25:23 +01001585TEST_SVE(sve_uqinc_uqdec_p_vector) {
1586 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001587 START();
1588
1589 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1590 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1591 Initialise(&masm, p0.VnB(), p0_inputs);
1592
1593 // Check that saturation behaves correctly.
1594
1595 uint64_t z0_inputs[] = {0x1234567800000042, 0, 1, 0x8000000000000000};
1596 InsrHelper(&masm, z0.VnD(), z0_inputs);
1597
1598 uint64_t z1_inputs[] = {0x12345678ffffff2a, 0, UINT64_MAX, INT64_MAX};
1599 InsrHelper(&masm, z1.VnD(), z1_inputs);
1600
1601 uint32_t z2_inputs[] = {0x12340042, 0, UINT32_MAX, 1, INT32_MAX, 0x80000000};
1602 InsrHelper(&masm, z2.VnS(), z2_inputs);
1603
1604 uint16_t z3_inputs[] = {0x122a, 0, 1, UINT16_MAX, 0x8000, INT16_MAX};
1605 InsrHelper(&masm, z3.VnH(), z3_inputs);
1606
1607 // The MacroAssembler implements non-destructive operations using movprfx.
1608 __ Uqdecp(z10.VnD(), p0, z0.VnD());
1609 __ Uqdecp(z11.VnD(), p0, z1.VnD());
1610 __ Uqdecp(z12.VnS(), p0, z2.VnS());
1611 __ Uqdecp(z13.VnH(), p0, z3.VnH());
1612
1613 __ Uqincp(z14.VnD(), p0, z0.VnD());
1614 __ Uqincp(z15.VnD(), p0, z1.VnD());
1615 __ Uqincp(z16.VnS(), p0, z2.VnS());
1616 __ Uqincp(z17.VnH(), p0, z3.VnH());
1617
1618 // Also test destructive forms.
1619 __ Mov(z4, z0);
1620 __ Mov(z5, z1);
1621 __ Mov(z6, z2);
1622 __ Mov(z7, z3);
1623
1624 __ Uqdecp(z0.VnD(), p0);
1625 __ Uqdecp(z1.VnD(), p0);
1626 __ Uqdecp(z2.VnS(), p0);
1627 __ Uqdecp(z3.VnH(), p0);
1628
1629 __ Uqincp(z4.VnD(), p0);
1630 __ Uqincp(z5.VnD(), p0);
1631 __ Uqincp(z6.VnS(), p0);
1632 __ Uqincp(z7.VnH(), p0);
1633
1634 END();
1635 if (CAN_RUN()) {
1636 RUN();
1637
1638 // z0_inputs[...] - number of active D lanes (2)
1639 uint64_t z0_expected[] = {0x1234567800000040, 0, 0, 0x7ffffffffffffffe};
1640 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1641
1642 // z1_inputs[...] - number of active D lanes (2)
1643 uint64_t z1_expected[] = {0x12345678ffffff28,
1644 0,
1645 0xfffffffffffffffd,
1646 0x7ffffffffffffffd};
1647 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1648
1649 // z2_inputs[...] - number of active S lanes (3)
1650 uint32_t z2_expected[] =
1651 {0x1234003f, 0, 0xfffffffc, 0, 0x7ffffffc, 0x7ffffffd};
1652 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1653
1654 // z3_inputs[...] - number of active H lanes (5)
1655 uint16_t z3_expected[] = {0x1225, 0, 0, 0xfffa, 0x7ffb, 0x7ffa};
1656 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1657
1658 // z0_inputs[...] + number of active D lanes (2)
1659 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1660 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1661
1662 // z1_inputs[...] + number of active D lanes (2)
1663 uint64_t z5_expected[] = {0x12345678ffffff2c,
1664 2,
1665 UINT64_MAX,
1666 0x8000000000000001};
1667 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1668
1669 // z2_inputs[...] + number of active S lanes (3)
1670 uint32_t z6_expected[] =
1671 {0x12340045, 3, UINT32_MAX, 4, 0x80000002, 0x80000003};
1672 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1673
1674 // z3_inputs[...] + number of active H lanes (5)
1675 uint16_t z7_expected[] = {0x122f, 5, 6, UINT16_MAX, 0x8005, 0x8004};
1676 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1677
1678 // Check that the non-destructive macros produced the same results.
1679 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1680 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1681 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1682 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1683 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1684 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1685 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1686 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1687 }
1688}
1689
Jacob Bramleye8289202019-07-31 11:25:23 +01001690TEST_SVE(sve_uqinc_uqdec_ptrue_vector) {
1691 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001692 START();
1693
1694 // With an all-true predicate, these instructions increment or decrement by
1695 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001696 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001697
1698 __ Mov(x0, 0x1234567800000000);
1699 __ Mov(x1, 0x12340000);
1700 __ Mov(x2, 0x1200);
1701
1702 __ Dup(z0.VnD(), x0);
1703 __ Uqdecp(z0.VnD(), p15);
1704
1705 __ Dup(z1.VnS(), x1);
1706 __ Uqdecp(z1.VnS(), p15);
1707
1708 __ Dup(z2.VnH(), x2);
1709 __ Uqdecp(z2.VnH(), p15);
1710
1711 __ Dup(z3.VnD(), x0);
1712 __ Uqincp(z3.VnD(), p15);
1713
1714 __ Dup(z4.VnS(), x1);
1715 __ Uqincp(z4.VnS(), p15);
1716
1717 __ Dup(z5.VnH(), x2);
1718 __ Uqincp(z5.VnH(), p15);
1719
1720 END();
1721 if (CAN_RUN()) {
1722 RUN();
1723
1724 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1725 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1726 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1727
1728 for (int i = 0; i < d_lane_count; i++) {
1729 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 - d_lane_count, z0.VnD(), i);
1730 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 + d_lane_count, z3.VnD(), i);
1731 }
1732
1733 for (int i = 0; i < s_lane_count; i++) {
1734 ASSERT_EQUAL_SVE_LANE(0x12340000 - s_lane_count, z1.VnS(), i);
1735 ASSERT_EQUAL_SVE_LANE(0x12340000 + s_lane_count, z4.VnS(), i);
1736 }
1737
1738 for (int i = 0; i < h_lane_count; i++) {
1739 ASSERT_EQUAL_SVE_LANE(0x1200 - h_lane_count, z2.VnH(), i);
1740 ASSERT_EQUAL_SVE_LANE(0x1200 + h_lane_count, z5.VnH(), i);
1741 }
1742 }
1743}
1744
Jacob Bramleye8289202019-07-31 11:25:23 +01001745TEST_SVE(sve_index) {
1746 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleycd8148c2019-07-11 18:43:20 +01001747 START();
1748
1749 // Simple cases.
1750 __ Index(z0.VnB(), 0, 1);
1751 __ Index(z1.VnH(), 1, 1);
1752 __ Index(z2.VnS(), 2, 1);
1753 __ Index(z3.VnD(), 3, 1);
1754
1755 // Synthesised immediates.
1756 __ Index(z4.VnB(), 42, -1);
1757 __ Index(z5.VnH(), -1, 42);
1758 __ Index(z6.VnS(), 42, 42);
1759
1760 // Register arguments.
1761 __ Mov(x0, 42);
1762 __ Mov(x1, -3);
1763 __ Index(z10.VnD(), x0, x1);
1764 __ Index(z11.VnB(), w0, w1);
1765 // The register size should correspond to the lane size, but VIXL allows any
1766 // register at least as big as the lane size.
1767 __ Index(z12.VnB(), x0, x1);
1768 __ Index(z13.VnH(), w0, x1);
1769 __ Index(z14.VnS(), x0, w1);
1770
1771 // Integer overflow.
1772 __ Index(z20.VnB(), UINT8_MAX - 2, 2);
1773 __ Index(z21.VnH(), 7, -3);
1774 __ Index(z22.VnS(), INT32_MAX - 2, 1);
1775 __ Index(z23.VnD(), INT64_MIN + 6, -7);
1776
1777 END();
1778
1779 if (CAN_RUN()) {
1780 RUN();
1781
1782 int b_lane_count = core.GetSVELaneCount(kBRegSize);
1783 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1784 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1785 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1786
1787 uint64_t b_mask = GetUintMask(kBRegSize);
1788 uint64_t h_mask = GetUintMask(kHRegSize);
1789 uint64_t s_mask = GetUintMask(kSRegSize);
1790 uint64_t d_mask = GetUintMask(kDRegSize);
1791
1792 // Simple cases.
1793 for (int i = 0; i < b_lane_count; i++) {
1794 ASSERT_EQUAL_SVE_LANE((0 + i) & b_mask, z0.VnB(), i);
1795 }
1796 for (int i = 0; i < h_lane_count; i++) {
1797 ASSERT_EQUAL_SVE_LANE((1 + i) & h_mask, z1.VnH(), i);
1798 }
1799 for (int i = 0; i < s_lane_count; i++) {
1800 ASSERT_EQUAL_SVE_LANE((2 + i) & s_mask, z2.VnS(), i);
1801 }
1802 for (int i = 0; i < d_lane_count; i++) {
1803 ASSERT_EQUAL_SVE_LANE((3 + i) & d_mask, z3.VnD(), i);
1804 }
1805
1806 // Synthesised immediates.
1807 for (int i = 0; i < b_lane_count; i++) {
1808 ASSERT_EQUAL_SVE_LANE((42 - i) & b_mask, z4.VnB(), i);
1809 }
1810 for (int i = 0; i < h_lane_count; i++) {
1811 ASSERT_EQUAL_SVE_LANE((-1 + (42 * i)) & h_mask, z5.VnH(), i);
1812 }
1813 for (int i = 0; i < s_lane_count; i++) {
1814 ASSERT_EQUAL_SVE_LANE((42 + (42 * i)) & s_mask, z6.VnS(), i);
1815 }
1816
1817 // Register arguments.
1818 for (int i = 0; i < d_lane_count; i++) {
1819 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & d_mask, z10.VnD(), i);
1820 }
1821 for (int i = 0; i < b_lane_count; i++) {
1822 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z11.VnB(), i);
1823 }
1824 for (int i = 0; i < b_lane_count; i++) {
1825 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z12.VnB(), i);
1826 }
1827 for (int i = 0; i < h_lane_count; i++) {
1828 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & h_mask, z13.VnH(), i);
1829 }
1830 for (int i = 0; i < s_lane_count; i++) {
1831 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & s_mask, z14.VnS(), i);
1832 }
1833
1834 // Integer overflow.
1835 uint8_t expected_z20[] = {0x05, 0x03, 0x01, 0xff, 0xfd};
1836 ASSERT_EQUAL_SVE(expected_z20, z20.VnB());
1837 uint16_t expected_z21[] = {0xfffb, 0xfffe, 0x0001, 0x0004, 0x0007};
1838 ASSERT_EQUAL_SVE(expected_z21, z21.VnH());
1839 uint32_t expected_z22[] = {0x80000000, 0x7fffffff, 0x7ffffffe, 0x7ffffffd};
1840 ASSERT_EQUAL_SVE(expected_z22, z22.VnS());
1841 uint64_t expected_z23[] = {0x7fffffffffffffff, 0x8000000000000006};
1842 ASSERT_EQUAL_SVE(expected_z23, z23.VnD());
1843 }
1844}
1845
TatWai Chongc844bb22019-06-10 15:32:53 -07001846TEST(sve_int_compare_count_and_limit_scalars) {
1847 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1848 START();
1849
1850 __ Mov(w20, 0xfffffffd);
1851 __ Mov(w21, 0xffffffff);
1852
1853 __ Whilele(p0.VnB(), w20, w21);
1854 __ Mrs(x0, NZCV);
1855 __ Whilele(p1.VnH(), w20, w21);
1856 __ Mrs(x1, NZCV);
1857
1858 __ Mov(w20, 0xffffffff);
1859 __ Mov(w21, 0x00000000);
1860
1861 __ Whilelt(p2.VnS(), w20, w21);
1862 __ Mrs(x2, NZCV);
1863 __ Whilelt(p3.VnD(), w20, w21);
1864 __ Mrs(x3, NZCV);
1865
1866 __ Mov(w20, 0xfffffffd);
1867 __ Mov(w21, 0xffffffff);
1868
1869 __ Whilels(p4.VnB(), w20, w21);
1870 __ Mrs(x4, NZCV);
1871 __ Whilels(p5.VnH(), w20, w21);
1872 __ Mrs(x5, NZCV);
1873
1874 __ Mov(w20, 0xffffffff);
1875 __ Mov(w21, 0x00000000);
1876
1877 __ Whilelo(p6.VnS(), w20, w21);
1878 __ Mrs(x6, NZCV);
1879 __ Whilelo(p7.VnD(), w20, w21);
1880 __ Mrs(x7, NZCV);
1881
1882 __ Mov(x20, 0xfffffffffffffffd);
1883 __ Mov(x21, 0xffffffffffffffff);
1884
1885 __ Whilele(p8.VnB(), x20, x21);
1886 __ Mrs(x8, NZCV);
1887 __ Whilele(p9.VnH(), x20, x21);
1888 __ Mrs(x9, NZCV);
1889
1890 __ Mov(x20, 0xffffffffffffffff);
1891 __ Mov(x21, 0x0000000000000000);
1892
1893 __ Whilelt(p10.VnS(), x20, x21);
1894 __ Mrs(x10, NZCV);
1895 __ Whilelt(p11.VnD(), x20, x21);
1896 __ Mrs(x11, NZCV);
1897
1898 __ Mov(x20, 0xfffffffffffffffd);
1899 __ Mov(x21, 0xffffffffffffffff);
1900
1901 __ Whilels(p12.VnB(), x20, x21);
1902 __ Mrs(x12, NZCV);
1903 __ Whilels(p13.VnH(), x20, x21);
1904 __ Mrs(x13, NZCV);
1905
1906 __ Mov(x20, 0xffffffffffffffff);
1907 __ Mov(x21, 0x0000000000000000);
1908
1909 __ Whilelo(p14.VnS(), x20, x21);
1910 __ Mrs(x14, NZCV);
1911 __ Whilelo(p15.VnD(), x20, x21);
1912 __ Mrs(x15, NZCV);
1913
1914 END();
1915
1916 if (CAN_RUN()) {
1917 RUN();
1918
1919 // 0b...00000000'00000111
1920 int p0_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
1921 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
1922
1923 // 0b...00000000'00010101
1924 int p1_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
1925 ASSERT_EQUAL_SVE(p1_expected, p1.VnH());
1926
1927 int p2_expected[] = {0x0, 0x0, 0x0, 0x1};
1928 ASSERT_EQUAL_SVE(p2_expected, p2.VnS());
1929
1930 int p3_expected[] = {0x00, 0x01};
1931 ASSERT_EQUAL_SVE(p3_expected, p3.VnD());
1932
1933 // 0b...11111111'11111111
1934 int p4_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1935 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
1936
1937 // 0b...01010101'01010101
1938 int p5_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
1939 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
1940
1941 int p6_expected[] = {0x0, 0x0, 0x0, 0x0};
1942 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
1943
1944 int p7_expected[] = {0x00, 0x00};
1945 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
1946
1947 // 0b...00000000'00000111
1948 int p8_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
1949 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
1950
1951 // 0b...00000000'00010101
1952 int p9_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
1953 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
1954
1955 int p10_expected[] = {0x0, 0x0, 0x0, 0x1};
1956 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
1957
1958 int p11_expected[] = {0x00, 0x01};
1959 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
1960
1961 // 0b...11111111'11111111
1962 int p12_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1963 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
1964
1965 // 0b...01010101'01010101
1966 int p13_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
1967 ASSERT_EQUAL_SVE(p13_expected, p13.VnH());
1968
1969 int p14_expected[] = {0x0, 0x0, 0x0, 0x0};
1970 ASSERT_EQUAL_SVE(p14_expected, p14.VnS());
1971
1972 int p15_expected[] = {0x00, 0x00};
1973 ASSERT_EQUAL_SVE(p15_expected, p15.VnD());
1974
1975 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w0);
1976 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w1);
1977 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w2);
1978 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w3);
1979 ASSERT_EQUAL_32(SVEFirstFlag, w4);
1980 ASSERT_EQUAL_32(SVEFirstFlag, w5);
1981 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w6);
1982 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w7);
1983 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w8);
1984 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w9);
1985 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
1986 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w11);
1987 ASSERT_EQUAL_32(SVEFirstFlag, w12);
1988 ASSERT_EQUAL_32(SVEFirstFlag, w13);
1989 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w14);
1990 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w15);
1991 }
1992}
1993
TatWai Chong302729c2019-06-14 16:18:51 -07001994TEST(sve_int_compare_vectors_signed_imm) {
1995 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1996 START();
1997
1998 int z13_inputs[] = {0, 1, -1, -15, 126, -127, -126, -15};
1999 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 1, 1};
2000 InsrHelper(&masm, z13.VnB(), z13_inputs);
2001 Initialise(&masm, p0.VnB(), mask_inputs1);
2002
2003 __ Cmpeq(p2.VnB(), p0.Zeroing(), z13.VnB(), -15);
2004 __ Mrs(x2, NZCV);
2005 __ Cmpeq(p3.VnB(), p0.Zeroing(), z13.VnB(), -127);
2006
2007 int z14_inputs[] = {0, 1, -1, -32767, -32766, 32767, 32766, 0};
2008 int mask_inputs2[] = {1, 1, 1, 0, 1, 1, 1, 1};
2009 InsrHelper(&masm, z14.VnH(), z14_inputs);
2010 Initialise(&masm, p0.VnH(), mask_inputs2);
2011
2012 __ Cmpge(p4.VnH(), p0.Zeroing(), z14.VnH(), -1);
2013 __ Mrs(x4, NZCV);
2014 __ Cmpge(p5.VnH(), p0.Zeroing(), z14.VnH(), -32767);
2015
2016 int z15_inputs[] = {0, 1, -1, INT_MIN};
2017 int mask_inputs3[] = {0, 1, 1, 1};
2018 InsrHelper(&masm, z15.VnS(), z15_inputs);
2019 Initialise(&masm, p0.VnS(), mask_inputs3);
2020
2021 __ Cmpgt(p6.VnS(), p0.Zeroing(), z15.VnS(), 0);
2022 __ Mrs(x6, NZCV);
2023 __ Cmpgt(p7.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2024
2025 __ Cmplt(p8.VnS(), p0.Zeroing(), z15.VnS(), 0);
2026 __ Mrs(x8, NZCV);
2027 __ Cmplt(p9.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2028
2029 int64_t z16_inputs[] = {0, -1};
2030 int mask_inputs4[] = {1, 1};
2031 InsrHelper(&masm, z16.VnD(), z16_inputs);
2032 Initialise(&masm, p0.VnD(), mask_inputs4);
2033
2034 __ Cmple(p10.VnD(), p0.Zeroing(), z16.VnD(), -1);
2035 __ Mrs(x10, NZCV);
2036 __ Cmple(p11.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MIN);
2037
2038 __ Cmpne(p12.VnD(), p0.Zeroing(), z16.VnD(), -1);
2039 __ Mrs(x12, NZCV);
2040 __ Cmpne(p13.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MAX);
2041
2042 END();
2043
2044 if (CAN_RUN()) {
2045 RUN();
2046
2047 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1};
2048 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2049
2050 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 0};
2051 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2052
2053 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1, 0x1};
2054 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2055
2056 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1};
2057 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2058
2059 int p6_expected[] = {0x0, 0x1, 0x0, 0x0};
2060 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2061
2062 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
2063 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2064
2065 int p8_expected[] = {0x0, 0x0, 0x1, 0x1};
2066 ASSERT_EQUAL_SVE(p8_expected, p8.VnS());
2067
2068 int p9_expected[] = {0x0, 0x0, 0x0, 0x1};
2069 ASSERT_EQUAL_SVE(p9_expected, p9.VnS());
2070
2071 int p10_expected[] = {0x00, 0x01};
2072 ASSERT_EQUAL_SVE(p10_expected, p10.VnD());
2073
2074 int p11_expected[] = {0x00, 0x00};
2075 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
2076
2077 int p12_expected[] = {0x01, 0x00};
2078 ASSERT_EQUAL_SVE(p12_expected, p12.VnD());
2079
2080 int p13_expected[] = {0x01, 0x01};
2081 ASSERT_EQUAL_SVE(p13_expected, p13.VnD());
2082
2083 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w2);
2084 ASSERT_EQUAL_32(SVEFirstFlag, w4);
2085 ASSERT_EQUAL_32(NoFlag, w6);
2086 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2087 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w10);
2088 ASSERT_EQUAL_32(NoFlag, w12);
2089 }
2090}
2091
2092TEST(sve_int_compare_vectors_unsigned_imm) {
2093 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2094 START();
2095
2096 uint32_t src1_inputs[] = {0xf7, 0x0f, 0x8f, 0x1f, 0x83, 0x12, 0x00, 0xf1};
2097 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 0, 1};
2098 InsrHelper(&masm, z13.VnB(), src1_inputs);
2099 Initialise(&masm, p0.VnB(), mask_inputs1);
2100
2101 __ Cmphi(p2.VnB(), p0.Zeroing(), z13.VnB(), 0x0f);
2102 __ Mrs(x2, NZCV);
2103 __ Cmphi(p3.VnB(), p0.Zeroing(), z13.VnB(), 0xf0);
2104
2105 uint32_t src2_inputs[] = {0xffff, 0x8000, 0x1fff, 0x0000, 0x1234};
2106 int mask_inputs2[] = {1, 1, 1, 1, 0};
2107 InsrHelper(&masm, z13.VnH(), src2_inputs);
2108 Initialise(&masm, p0.VnH(), mask_inputs2);
2109
2110 __ Cmphs(p4.VnH(), p0.Zeroing(), z13.VnH(), 0x1f);
2111 __ Mrs(x4, NZCV);
2112 __ Cmphs(p5.VnH(), p0.Zeroing(), z13.VnH(), 0x1fff);
2113
2114 uint32_t src3_inputs[] = {0xffffffff, 0xfedcba98, 0x0000ffff, 0x00000000};
2115 int mask_inputs3[] = {1, 1, 1, 1};
2116 InsrHelper(&masm, z13.VnS(), src3_inputs);
2117 Initialise(&masm, p0.VnS(), mask_inputs3);
2118
2119 __ Cmplo(p6.VnS(), p0.Zeroing(), z13.VnS(), 0x3f);
2120 __ Mrs(x6, NZCV);
2121 __ Cmplo(p7.VnS(), p0.Zeroing(), z13.VnS(), 0x3f3f3f3f);
2122
2123 uint64_t src4_inputs[] = {0xffffffffffffffff, 0x0000000000000000};
2124 int mask_inputs4[] = {1, 1};
2125 InsrHelper(&masm, z13.VnD(), src4_inputs);
2126 Initialise(&masm, p0.VnD(), mask_inputs4);
2127
2128 __ Cmpls(p8.VnD(), p0.Zeroing(), z13.VnD(), 0x2f);
2129 __ Mrs(x8, NZCV);
2130 __ Cmpls(p9.VnD(), p0.Zeroing(), z13.VnD(), 0x800000000000000);
2131
2132 END();
2133
2134 if (CAN_RUN()) {
2135 RUN();
2136
2137 int p2_expected[] = {1, 0, 1, 0, 1, 1, 0, 1};
2138 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2139
2140 int p3_expected[] = {1, 0, 0, 0, 0, 0, 0, 1};
2141 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2142
2143 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2144 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2145
2146 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2147 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2148
2149 int p6_expected[] = {0x0, 0x0, 0x0, 0x1};
2150 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2151
2152 int p7_expected[] = {0x0, 0x0, 0x1, 0x1};
2153 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2154
2155 int p8_expected[] = {0x00, 0x01};
2156 ASSERT_EQUAL_SVE(p8_expected, p8.VnD());
2157
2158 int p9_expected[] = {0x00, 0x01};
2159 ASSERT_EQUAL_SVE(p9_expected, p9.VnD());
2160
2161 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2162 ASSERT_EQUAL_32(NoFlag, w4);
2163 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w6);
2164 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2165 }
2166}
2167
TatWai Chongc844bb22019-06-10 15:32:53 -07002168TEST(sve_int_compare_conditionally_terminate_scalars) {
2169 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2170 START();
2171
2172 __ Mov(x0, 0xfedcba9887654321);
2173 __ Mov(x1, 0x1000100010001000);
2174
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002175 // Initialise Z and C. These are preserved by cterm*, and the V flag is set to
2176 // !C if the condition does not hold.
2177 __ Mov(x10, NoFlag);
2178 __ Msr(NZCV, x10);
2179
TatWai Chongc844bb22019-06-10 15:32:53 -07002180 __ Ctermeq(w0, w0);
2181 __ Mrs(x2, NZCV);
2182 __ Ctermeq(x0, x1);
2183 __ Mrs(x3, NZCV);
2184 __ Ctermne(x0, x0);
2185 __ Mrs(x4, NZCV);
2186 __ Ctermne(w0, w1);
2187 __ Mrs(x5, NZCV);
2188
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002189 // As above, but with all flags initially set.
2190 __ Mov(x10, NZCVFlag);
2191 __ Msr(NZCV, x10);
2192
2193 __ Ctermeq(w0, w0);
2194 __ Mrs(x6, NZCV);
2195 __ Ctermeq(x0, x1);
2196 __ Mrs(x7, NZCV);
2197 __ Ctermne(x0, x0);
2198 __ Mrs(x8, NZCV);
2199 __ Ctermne(w0, w1);
2200 __ Mrs(x9, NZCV);
2201
TatWai Chongc844bb22019-06-10 15:32:53 -07002202 END();
2203
2204 if (CAN_RUN()) {
2205 RUN();
2206
2207 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2208 ASSERT_EQUAL_32(VFlag, w3);
2209 ASSERT_EQUAL_32(VFlag, w4);
2210 ASSERT_EQUAL_32(SVEFirstFlag, w5);
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002211
2212 ASSERT_EQUAL_32(SVEFirstFlag | ZCFlag, w6);
2213 ASSERT_EQUAL_32(ZCFlag, w7);
2214 ASSERT_EQUAL_32(ZCFlag, w8);
2215 ASSERT_EQUAL_32(SVEFirstFlag | ZCFlag, w9);
TatWai Chongc844bb22019-06-10 15:32:53 -07002216 }
2217}
2218
Jacob Bramley0ce75842019-07-17 18:12:50 +01002219// Work out what the architectural `PredTest` pseudocode should produce for the
2220// given result and governing predicate.
2221template <typename Tg, typename Td, int N>
2222static StatusFlags GetPredTestFlags(const Td (&pd)[N],
2223 const Tg (&pg)[N],
2224 int vl) {
2225 int first = -1;
2226 int last = -1;
2227 bool any_active = false;
2228
2229 // Only consider potentially-active lanes.
2230 int start = (N > vl) ? (N - vl) : 0;
2231 for (int i = start; i < N; i++) {
2232 if ((pg[i] & 1) == 1) {
2233 // Look for the first and last active lanes.
2234 // Note that the 'first' lane is the one with the highest index.
2235 if (last < 0) last = i;
2236 first = i;
2237 // Look for any active lanes that are also active in pd.
2238 if ((pd[i] & 1) == 1) any_active = true;
2239 }
2240 }
2241
2242 uint32_t flags = 0;
2243 if ((first >= 0) && ((pd[first] & 1) == 1)) flags |= SVEFirstFlag;
2244 if (!any_active) flags |= SVENoneFlag;
2245 if ((last < 0) || ((pd[last] & 1) == 0)) flags |= SVENotLastFlag;
2246 return static_cast<StatusFlags>(flags);
2247}
2248
2249typedef void (MacroAssembler::*PfirstPnextFn)(const PRegisterWithLaneSize& pd,
2250 const PRegister& pg,
2251 const PRegisterWithLaneSize& pn);
2252template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002253static void PfirstPnextHelper(Test* config,
2254 PfirstPnextFn macro,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002255 unsigned lane_size_in_bits,
2256 const Tg& pg_inputs,
2257 const Tn& pn_inputs,
2258 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002259 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002260 START();
2261
2262 PRegister pg = p15;
2263 PRegister pn = p14;
2264 Initialise(&masm, pg.WithLaneSize(lane_size_in_bits), pg_inputs);
2265 Initialise(&masm, pn.WithLaneSize(lane_size_in_bits), pn_inputs);
2266
2267 // Initialise NZCV to an impossible value, to check that we actually write it.
2268 __ Mov(x10, NZCVFlag);
2269
2270 // If pd.Is(pn), the MacroAssembler simply passes the arguments directly to
2271 // the Assembler.
2272 __ Msr(NZCV, x10);
2273 __ Mov(p0, pn);
2274 (masm.*macro)(p0.WithLaneSize(lane_size_in_bits),
2275 pg,
2276 p0.WithLaneSize(lane_size_in_bits));
2277 __ Mrs(x0, NZCV);
2278
2279 // The MacroAssembler supports non-destructive use.
2280 __ Msr(NZCV, x10);
2281 (masm.*macro)(p1.WithLaneSize(lane_size_in_bits),
2282 pg,
2283 pn.WithLaneSize(lane_size_in_bits));
2284 __ Mrs(x1, NZCV);
2285
2286 // If pd.Aliases(pg) the macro requires a scratch register.
2287 {
2288 UseScratchRegisterScope temps(&masm);
2289 temps.Include(p13);
2290 __ Msr(NZCV, x10);
2291 __ Mov(p2, p15);
2292 (masm.*macro)(p2.WithLaneSize(lane_size_in_bits),
2293 p2,
2294 pn.WithLaneSize(lane_size_in_bits));
2295 __ Mrs(x2, NZCV);
2296 }
2297
2298 END();
2299
2300 if (CAN_RUN()) {
2301 RUN();
2302
2303 // Check that the inputs weren't modified.
2304 ASSERT_EQUAL_SVE(pn_inputs, pn.WithLaneSize(lane_size_in_bits));
2305 ASSERT_EQUAL_SVE(pg_inputs, pg.WithLaneSize(lane_size_in_bits));
2306
2307 // Check the primary operation.
2308 ASSERT_EQUAL_SVE(pd_expected, p0.WithLaneSize(lane_size_in_bits));
2309 ASSERT_EQUAL_SVE(pd_expected, p1.WithLaneSize(lane_size_in_bits));
2310 ASSERT_EQUAL_SVE(pd_expected, p2.WithLaneSize(lane_size_in_bits));
2311
2312 // Check that the flags were properly set.
2313 StatusFlags nzcv_expected =
2314 GetPredTestFlags(pd_expected,
2315 pg_inputs,
2316 core.GetSVELaneCount(kBRegSize));
2317 ASSERT_EQUAL_64(nzcv_expected, x0);
2318 ASSERT_EQUAL_64(nzcv_expected, x1);
2319 ASSERT_EQUAL_64(nzcv_expected, x2);
2320 }
2321}
2322
2323template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002324static void PfirstHelper(Test* config,
2325 const Tg& pg_inputs,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002326 const Tn& pn_inputs,
2327 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002328 PfirstPnextHelper(config,
2329 &MacroAssembler::Pfirst,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002330 kBRegSize, // pfirst only accepts B-sized lanes.
2331 pg_inputs,
2332 pn_inputs,
2333 pd_expected);
2334}
2335
2336template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002337static void PnextHelper(Test* config,
2338 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002339 const Tg& pg_inputs,
2340 const Tn& pn_inputs,
2341 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002342 PfirstPnextHelper(config,
2343 &MacroAssembler::Pnext,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002344 lane_size_in_bits,
2345 pg_inputs,
2346 pn_inputs,
2347 pd_expected);
2348}
2349
Jacob Bramleye8289202019-07-31 11:25:23 +01002350TEST_SVE(sve_pfirst) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002351 // Provide more lanes than kPRegMinSize (to check propagation if we have a
2352 // large VL), but few enough to make the test easy to read.
2353 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2354 int in1[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2355 int in2[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2356 int in3[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2357 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2358 VIXL_ASSERT(ArrayLength(in0) > kPRegMinSize);
2359
2360 // Pfirst finds the first active lane in pg, and activates the corresponding
2361 // lane in pn (if it isn't already active).
2362
2363 // The first active lane in in1 is here. |
2364 // v
2365 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2366 int exp12[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0};
2367 int exp13[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2368 int exp14[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002369 PfirstHelper(config, in1, in0, exp10);
2370 PfirstHelper(config, in1, in2, exp12);
2371 PfirstHelper(config, in1, in3, exp13);
2372 PfirstHelper(config, in1, in4, exp14);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002373
2374 // The first active lane in in2 is here. |
2375 // v
2376 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2377 int exp21[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0};
2378 int exp23[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2379 int exp24[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002380 PfirstHelper(config, in2, in0, exp20);
2381 PfirstHelper(config, in2, in1, exp21);
2382 PfirstHelper(config, in2, in3, exp23);
2383 PfirstHelper(config, in2, in4, exp24);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002384
2385 // The first active lane in in3 is here. |
2386 // v
2387 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2388 int exp31[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
2389 int exp32[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1};
2390 int exp34[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002391 PfirstHelper(config, in3, in0, exp30);
2392 PfirstHelper(config, in3, in1, exp31);
2393 PfirstHelper(config, in3, in2, exp32);
2394 PfirstHelper(config, in3, in4, exp34);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002395
2396 // | The first active lane in in4 is here.
2397 // v
2398 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2399 int exp41[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2400 int exp42[] = {1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2401 int exp43[] = {1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002402 PfirstHelper(config, in4, in0, exp40);
2403 PfirstHelper(config, in4, in1, exp41);
2404 PfirstHelper(config, in4, in2, exp42);
2405 PfirstHelper(config, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002406
2407 // If pg is all inactive, the input is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002408 PfirstHelper(config, in0, in0, in0);
2409 PfirstHelper(config, in0, in1, in1);
2410 PfirstHelper(config, in0, in2, in2);
2411 PfirstHelper(config, in0, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002412
2413 // If the values of pg and pn match, the value is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002414 PfirstHelper(config, in0, in0, in0);
2415 PfirstHelper(config, in1, in1, in1);
2416 PfirstHelper(config, in2, in2, in2);
2417 PfirstHelper(config, in3, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002418}
2419
Jacob Bramleye8289202019-07-31 11:25:23 +01002420TEST_SVE(sve_pfirst_alias) {
2421 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002422 START();
2423
2424 // Check that the Simulator behaves correctly when all arguments are aliased.
2425 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2426 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2427 int in_s[] = {0, 1, 1, 0};
2428 int in_d[] = {1, 1};
2429
2430 Initialise(&masm, p0.VnB(), in_b);
2431 Initialise(&masm, p1.VnH(), in_h);
2432 Initialise(&masm, p2.VnS(), in_s);
2433 Initialise(&masm, p3.VnD(), in_d);
2434
2435 // Initialise NZCV to an impossible value, to check that we actually write it.
2436 __ Mov(x10, NZCVFlag);
2437
2438 __ Msr(NZCV, x10);
2439 __ Pfirst(p0.VnB(), p0.VnB(), p0.VnB());
2440 __ Mrs(x0, NZCV);
2441
2442 __ Msr(NZCV, x10);
2443 __ Pfirst(p1.VnB(), p1.VnB(), p1.VnB());
2444 __ Mrs(x1, NZCV);
2445
2446 __ Msr(NZCV, x10);
2447 __ Pfirst(p2.VnB(), p2.VnB(), p2.VnB());
2448 __ Mrs(x2, NZCV);
2449
2450 __ Msr(NZCV, x10);
2451 __ Pfirst(p3.VnB(), p3.VnB(), p3.VnB());
2452 __ Mrs(x3, NZCV);
2453
2454 END();
2455
2456 if (CAN_RUN()) {
2457 RUN();
2458
2459 // The first lane from pg is already active in pdn, so the P register should
2460 // be unchanged.
2461 ASSERT_EQUAL_SVE(in_b, p0.VnB());
2462 ASSERT_EQUAL_SVE(in_h, p1.VnH());
2463 ASSERT_EQUAL_SVE(in_s, p2.VnS());
2464 ASSERT_EQUAL_SVE(in_d, p3.VnD());
2465
2466 ASSERT_EQUAL_64(SVEFirstFlag, x0);
2467 ASSERT_EQUAL_64(SVEFirstFlag, x1);
2468 ASSERT_EQUAL_64(SVEFirstFlag, x2);
2469 ASSERT_EQUAL_64(SVEFirstFlag, x3);
2470 }
2471}
2472
Jacob Bramleye8289202019-07-31 11:25:23 +01002473TEST_SVE(sve_pnext_b) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002474 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2475 // (to check propagation if we have a large VL), but few enough to make the
2476 // test easy to read.
2477 // For now, we just use kPRegMinSize so that the test works anywhere.
2478 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2479 int in1[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2480 int in2[] = {0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2481 int in3[] = {0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1};
2482 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2483
2484 // Pnext activates the next element that is true in pg, after the last-active
2485 // element in pn. If all pn elements are false (as in in0), it starts looking
2486 // at element 0.
2487
2488 // There are no active lanes in in0, so the result is simply the first active
2489 // lane from pg.
2490 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2491 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2492 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2493 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2494 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2495
2496 // The last active lane in in1 is here. |
2497 // v
2498 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2499 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2500 int exp21[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2501 int exp31[] = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2502 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2503
2504 // | The last active lane in in2 is here.
2505 // v
2506 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2507 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2508 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2509 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2510 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2511
2512 // | The last active lane in in3 is here.
2513 // v
2514 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2515 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2516 int exp23[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2517 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2518 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2519
2520 // | The last active lane in in4 is here.
2521 // v
2522 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2523 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2524 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2525 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2526 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2527
Jacob Bramleye8289202019-07-31 11:25:23 +01002528 PnextHelper(config, kBRegSize, in0, in0, exp00);
2529 PnextHelper(config, kBRegSize, in1, in0, exp10);
2530 PnextHelper(config, kBRegSize, in2, in0, exp20);
2531 PnextHelper(config, kBRegSize, in3, in0, exp30);
2532 PnextHelper(config, kBRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002533
Jacob Bramleye8289202019-07-31 11:25:23 +01002534 PnextHelper(config, kBRegSize, in0, in1, exp01);
2535 PnextHelper(config, kBRegSize, in1, in1, exp11);
2536 PnextHelper(config, kBRegSize, in2, in1, exp21);
2537 PnextHelper(config, kBRegSize, in3, in1, exp31);
2538 PnextHelper(config, kBRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002539
Jacob Bramleye8289202019-07-31 11:25:23 +01002540 PnextHelper(config, kBRegSize, in0, in2, exp02);
2541 PnextHelper(config, kBRegSize, in1, in2, exp12);
2542 PnextHelper(config, kBRegSize, in2, in2, exp22);
2543 PnextHelper(config, kBRegSize, in3, in2, exp32);
2544 PnextHelper(config, kBRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002545
Jacob Bramleye8289202019-07-31 11:25:23 +01002546 PnextHelper(config, kBRegSize, in0, in3, exp03);
2547 PnextHelper(config, kBRegSize, in1, in3, exp13);
2548 PnextHelper(config, kBRegSize, in2, in3, exp23);
2549 PnextHelper(config, kBRegSize, in3, in3, exp33);
2550 PnextHelper(config, kBRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002551
Jacob Bramleye8289202019-07-31 11:25:23 +01002552 PnextHelper(config, kBRegSize, in0, in4, exp04);
2553 PnextHelper(config, kBRegSize, in1, in4, exp14);
2554 PnextHelper(config, kBRegSize, in2, in4, exp24);
2555 PnextHelper(config, kBRegSize, in3, in4, exp34);
2556 PnextHelper(config, kBRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002557}
2558
Jacob Bramleye8289202019-07-31 11:25:23 +01002559TEST_SVE(sve_pnext_h) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002560 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2561 // (to check propagation if we have a large VL), but few enough to make the
2562 // test easy to read.
2563 // For now, we just use kPRegMinSize so that the test works anywhere.
2564 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0};
2565 int in1[] = {0, 0, 0, 1, 0, 2, 1, 0};
2566 int in2[] = {0, 1, 2, 0, 2, 0, 2, 0};
2567 int in3[] = {0, 0, 0, 3, 0, 0, 0, 3};
2568 int in4[] = {3, 0, 0, 0, 0, 0, 0, 0};
2569
2570 // Pnext activates the next element that is true in pg, after the last-active
2571 // element in pn. If all pn elements are false (as in in0), it starts looking
2572 // at element 0.
2573 //
2574 // As for other SVE instructions, elements are only considered to be active if
2575 // the _first_ bit in each field is one. Other bits are ignored.
2576
2577 // There are no active lanes in in0, so the result is simply the first active
2578 // lane from pg.
2579 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0};
2580 int exp10[] = {0, 0, 0, 0, 0, 0, 1, 0};
2581 int exp20[] = {0, 1, 0, 0, 0, 0, 0, 0};
2582 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 1};
2583 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0};
2584
2585 // | The last active lane in in1 is here.
2586 // v
2587 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0};
2588 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0};
2589 int exp21[] = {0, 1, 0, 0, 0, 0, 0, 0};
2590 int exp31[] = {0, 0, 0, 0, 0, 0, 0, 0};
2591 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0};
2592
2593 // | The last active lane in in2 is here.
2594 // v
2595 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0};
2596 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0};
2597 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0};
2598 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0};
2599 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0};
2600
2601 // | The last active lane in in3 is here.
2602 // v
2603 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0};
2604 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0};
2605 int exp23[] = {0, 1, 0, 0, 0, 0, 0, 0};
2606 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0};
2607 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0};
2608
2609 // | The last active lane in in4 is here.
2610 // v
2611 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0};
2612 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0};
2613 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0};
2614 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0};
2615 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0};
2616
Jacob Bramleye8289202019-07-31 11:25:23 +01002617 PnextHelper(config, kHRegSize, in0, in0, exp00);
2618 PnextHelper(config, kHRegSize, in1, in0, exp10);
2619 PnextHelper(config, kHRegSize, in2, in0, exp20);
2620 PnextHelper(config, kHRegSize, in3, in0, exp30);
2621 PnextHelper(config, kHRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002622
Jacob Bramleye8289202019-07-31 11:25:23 +01002623 PnextHelper(config, kHRegSize, in0, in1, exp01);
2624 PnextHelper(config, kHRegSize, in1, in1, exp11);
2625 PnextHelper(config, kHRegSize, in2, in1, exp21);
2626 PnextHelper(config, kHRegSize, in3, in1, exp31);
2627 PnextHelper(config, kHRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002628
Jacob Bramleye8289202019-07-31 11:25:23 +01002629 PnextHelper(config, kHRegSize, in0, in2, exp02);
2630 PnextHelper(config, kHRegSize, in1, in2, exp12);
2631 PnextHelper(config, kHRegSize, in2, in2, exp22);
2632 PnextHelper(config, kHRegSize, in3, in2, exp32);
2633 PnextHelper(config, kHRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002634
Jacob Bramleye8289202019-07-31 11:25:23 +01002635 PnextHelper(config, kHRegSize, in0, in3, exp03);
2636 PnextHelper(config, kHRegSize, in1, in3, exp13);
2637 PnextHelper(config, kHRegSize, in2, in3, exp23);
2638 PnextHelper(config, kHRegSize, in3, in3, exp33);
2639 PnextHelper(config, kHRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002640
Jacob Bramleye8289202019-07-31 11:25:23 +01002641 PnextHelper(config, kHRegSize, in0, in4, exp04);
2642 PnextHelper(config, kHRegSize, in1, in4, exp14);
2643 PnextHelper(config, kHRegSize, in2, in4, exp24);
2644 PnextHelper(config, kHRegSize, in3, in4, exp34);
2645 PnextHelper(config, kHRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002646}
2647
Jacob Bramleye8289202019-07-31 11:25:23 +01002648TEST_SVE(sve_pnext_s) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002649 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2650 // (to check propagation if we have a large VL), but few enough to make the
2651 // test easy to read.
2652 // For now, we just use kPRegMinSize so that the test works anywhere.
2653 int in0[] = {0xe, 0xc, 0x8, 0x0};
2654 int in1[] = {0x0, 0x2, 0x0, 0x1};
2655 int in2[] = {0x0, 0x1, 0xf, 0x0};
2656 int in3[] = {0xf, 0x0, 0x0, 0x0};
2657
2658 // Pnext activates the next element that is true in pg, after the last-active
2659 // element in pn. If all pn elements are false (as in in0), it starts looking
2660 // at element 0.
2661 //
2662 // As for other SVE instructions, elements are only considered to be active if
2663 // the _first_ bit in each field is one. Other bits are ignored.
2664
2665 // There are no active lanes in in0, so the result is simply the first active
2666 // lane from pg.
2667 int exp00[] = {0, 0, 0, 0};
2668 int exp10[] = {0, 0, 0, 1};
2669 int exp20[] = {0, 0, 1, 0};
2670 int exp30[] = {1, 0, 0, 0};
2671
2672 // | The last active lane in in1 is here.
2673 // v
2674 int exp01[] = {0, 0, 0, 0};
2675 int exp11[] = {0, 0, 0, 0};
2676 int exp21[] = {0, 0, 1, 0};
2677 int exp31[] = {1, 0, 0, 0};
2678
2679 // | The last active lane in in2 is here.
2680 // v
2681 int exp02[] = {0, 0, 0, 0};
2682 int exp12[] = {0, 0, 0, 0};
2683 int exp22[] = {0, 0, 0, 0};
2684 int exp32[] = {1, 0, 0, 0};
2685
2686 // | The last active lane in in3 is here.
2687 // v
2688 int exp03[] = {0, 0, 0, 0};
2689 int exp13[] = {0, 0, 0, 0};
2690 int exp23[] = {0, 0, 0, 0};
2691 int exp33[] = {0, 0, 0, 0};
2692
Jacob Bramleye8289202019-07-31 11:25:23 +01002693 PnextHelper(config, kSRegSize, in0, in0, exp00);
2694 PnextHelper(config, kSRegSize, in1, in0, exp10);
2695 PnextHelper(config, kSRegSize, in2, in0, exp20);
2696 PnextHelper(config, kSRegSize, in3, in0, exp30);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002697
Jacob Bramleye8289202019-07-31 11:25:23 +01002698 PnextHelper(config, kSRegSize, in0, in1, exp01);
2699 PnextHelper(config, kSRegSize, in1, in1, exp11);
2700 PnextHelper(config, kSRegSize, in2, in1, exp21);
2701 PnextHelper(config, kSRegSize, in3, in1, exp31);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002702
Jacob Bramleye8289202019-07-31 11:25:23 +01002703 PnextHelper(config, kSRegSize, in0, in2, exp02);
2704 PnextHelper(config, kSRegSize, in1, in2, exp12);
2705 PnextHelper(config, kSRegSize, in2, in2, exp22);
2706 PnextHelper(config, kSRegSize, in3, in2, exp32);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002707
Jacob Bramleye8289202019-07-31 11:25:23 +01002708 PnextHelper(config, kSRegSize, in0, in3, exp03);
2709 PnextHelper(config, kSRegSize, in1, in3, exp13);
2710 PnextHelper(config, kSRegSize, in2, in3, exp23);
2711 PnextHelper(config, kSRegSize, in3, in3, exp33);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002712}
2713
Jacob Bramleye8289202019-07-31 11:25:23 +01002714TEST_SVE(sve_pnext_d) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002715 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2716 // (to check propagation if we have a large VL), but few enough to make the
2717 // test easy to read.
2718 // For now, we just use kPRegMinSize so that the test works anywhere.
2719 int in0[] = {0xfe, 0xf0};
2720 int in1[] = {0x00, 0x55};
2721 int in2[] = {0x33, 0xff};
2722
2723 // Pnext activates the next element that is true in pg, after the last-active
2724 // element in pn. If all pn elements are false (as in in0), it starts looking
2725 // at element 0.
2726 //
2727 // As for other SVE instructions, elements are only considered to be active if
2728 // the _first_ bit in each field is one. Other bits are ignored.
2729
2730 // There are no active lanes in in0, so the result is simply the first active
2731 // lane from pg.
2732 int exp00[] = {0, 0};
2733 int exp10[] = {0, 1};
2734 int exp20[] = {0, 1};
2735
2736 // | The last active lane in in1 is here.
2737 // v
2738 int exp01[] = {0, 0};
2739 int exp11[] = {0, 0};
2740 int exp21[] = {1, 0};
2741
2742 // | The last active lane in in2 is here.
2743 // v
2744 int exp02[] = {0, 0};
2745 int exp12[] = {0, 0};
2746 int exp22[] = {0, 0};
2747
Jacob Bramleye8289202019-07-31 11:25:23 +01002748 PnextHelper(config, kDRegSize, in0, in0, exp00);
2749 PnextHelper(config, kDRegSize, in1, in0, exp10);
2750 PnextHelper(config, kDRegSize, in2, in0, exp20);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002751
Jacob Bramleye8289202019-07-31 11:25:23 +01002752 PnextHelper(config, kDRegSize, in0, in1, exp01);
2753 PnextHelper(config, kDRegSize, in1, in1, exp11);
2754 PnextHelper(config, kDRegSize, in2, in1, exp21);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002755
Jacob Bramleye8289202019-07-31 11:25:23 +01002756 PnextHelper(config, kDRegSize, in0, in2, exp02);
2757 PnextHelper(config, kDRegSize, in1, in2, exp12);
2758 PnextHelper(config, kDRegSize, in2, in2, exp22);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002759}
2760
Jacob Bramleye8289202019-07-31 11:25:23 +01002761TEST_SVE(sve_pnext_alias) {
2762 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002763 START();
2764
2765 // Check that the Simulator behaves correctly when all arguments are aliased.
2766 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2767 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2768 int in_s[] = {0, 1, 1, 0};
2769 int in_d[] = {1, 1};
2770
2771 Initialise(&masm, p0.VnB(), in_b);
2772 Initialise(&masm, p1.VnH(), in_h);
2773 Initialise(&masm, p2.VnS(), in_s);
2774 Initialise(&masm, p3.VnD(), in_d);
2775
2776 // Initialise NZCV to an impossible value, to check that we actually write it.
2777 __ Mov(x10, NZCVFlag);
2778
2779 __ Msr(NZCV, x10);
2780 __ Pnext(p0.VnB(), p0.VnB(), p0.VnB());
2781 __ Mrs(x0, NZCV);
2782
2783 __ Msr(NZCV, x10);
2784 __ Pnext(p1.VnB(), p1.VnB(), p1.VnB());
2785 __ Mrs(x1, NZCV);
2786
2787 __ Msr(NZCV, x10);
2788 __ Pnext(p2.VnB(), p2.VnB(), p2.VnB());
2789 __ Mrs(x2, NZCV);
2790
2791 __ Msr(NZCV, x10);
2792 __ Pnext(p3.VnB(), p3.VnB(), p3.VnB());
2793 __ Mrs(x3, NZCV);
2794
2795 END();
2796
2797 if (CAN_RUN()) {
2798 RUN();
2799
2800 // Since pg.Is(pdn), there can be no active lanes in pg above the last
2801 // active lane in pdn, so the result should always be zero.
2802 ASSERT_EQUAL_SVE(0, p0.VnB());
2803 ASSERT_EQUAL_SVE(0, p1.VnH());
2804 ASSERT_EQUAL_SVE(0, p2.VnS());
2805 ASSERT_EQUAL_SVE(0, p3.VnD());
2806
2807 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x0);
2808 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x1);
2809 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x2);
2810 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x3);
2811 }
2812}
2813
Jacob Bramleye8289202019-07-31 11:25:23 +01002814static void PtrueHelper(Test* config,
2815 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002816 FlagsUpdate s = LeaveFlags) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002817 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002818 START();
2819
2820 PRegisterWithLaneSize p[kNumberOfPRegisters];
2821 for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
2822 p[i] = PRegister(i).WithLaneSize(lane_size_in_bits);
2823 }
2824
2825 // Initialise NZCV to an impossible value, to check that we actually write it.
2826 StatusFlags nzcv_unmodified = NZCVFlag;
2827 __ Mov(x20, nzcv_unmodified);
2828
2829 // We don't have enough registers to conveniently test every pattern, so take
2830 // samples from each group.
2831 __ Msr(NZCV, x20);
2832 __ Ptrue(p[0], SVE_POW2, s);
2833 __ Mrs(x0, NZCV);
2834
2835 __ Msr(NZCV, x20);
2836 __ Ptrue(p[1], SVE_VL1, s);
2837 __ Mrs(x1, NZCV);
2838
2839 __ Msr(NZCV, x20);
2840 __ Ptrue(p[2], SVE_VL2, s);
2841 __ Mrs(x2, NZCV);
2842
2843 __ Msr(NZCV, x20);
2844 __ Ptrue(p[3], SVE_VL5, s);
2845 __ Mrs(x3, NZCV);
2846
2847 __ Msr(NZCV, x20);
2848 __ Ptrue(p[4], SVE_VL6, s);
2849 __ Mrs(x4, NZCV);
2850
2851 __ Msr(NZCV, x20);
2852 __ Ptrue(p[5], SVE_VL8, s);
2853 __ Mrs(x5, NZCV);
2854
2855 __ Msr(NZCV, x20);
2856 __ Ptrue(p[6], SVE_VL16, s);
2857 __ Mrs(x6, NZCV);
2858
2859 __ Msr(NZCV, x20);
2860 __ Ptrue(p[7], SVE_VL64, s);
2861 __ Mrs(x7, NZCV);
2862
2863 __ Msr(NZCV, x20);
2864 __ Ptrue(p[8], SVE_VL256, s);
2865 __ Mrs(x8, NZCV);
2866
2867 {
2868 // We have to use the Assembler to use values not defined by
2869 // SVEPredicateConstraint, so call `ptrues` directly..
2870 typedef void (
2871 MacroAssembler::*AssemblePtrueFn)(const PRegisterWithLaneSize& pd,
2872 int pattern);
2873 AssemblePtrueFn assemble =
2874 (s == SetFlags) ? &MacroAssembler::ptrues : &MacroAssembler::ptrue;
2875
2876 ExactAssemblyScope guard(&masm, 12 * kInstructionSize);
2877 __ msr(NZCV, x20);
2878 (masm.*assemble)(p[9], 0xe);
2879 __ mrs(x9, NZCV);
2880
2881 __ msr(NZCV, x20);
2882 (masm.*assemble)(p[10], 0x16);
2883 __ mrs(x10, NZCV);
2884
2885 __ msr(NZCV, x20);
2886 (masm.*assemble)(p[11], 0x1a);
2887 __ mrs(x11, NZCV);
2888
2889 __ msr(NZCV, x20);
2890 (masm.*assemble)(p[12], 0x1c);
2891 __ mrs(x12, NZCV);
2892 }
2893
2894 __ Msr(NZCV, x20);
2895 __ Ptrue(p[13], SVE_MUL4, s);
2896 __ Mrs(x13, NZCV);
2897
2898 __ Msr(NZCV, x20);
2899 __ Ptrue(p[14], SVE_MUL3, s);
2900 __ Mrs(x14, NZCV);
2901
2902 __ Msr(NZCV, x20);
2903 __ Ptrue(p[15], SVE_ALL, s);
2904 __ Mrs(x15, NZCV);
2905
2906 END();
2907
2908 if (CAN_RUN()) {
2909 RUN();
2910
2911 int all = core.GetSVELaneCount(lane_size_in_bits);
2912 int pow2 = 1 << HighestSetBitPosition(all);
2913 int mul4 = all - (all % 4);
2914 int mul3 = all - (all % 3);
2915
2916 // Check P register results.
2917 for (int i = 0; i < all; i++) {
2918 ASSERT_EQUAL_SVE_LANE(i < pow2, p[0], i);
2919 ASSERT_EQUAL_SVE_LANE((all >= 1) && (i < 1), p[1], i);
2920 ASSERT_EQUAL_SVE_LANE((all >= 2) && (i < 2), p[2], i);
2921 ASSERT_EQUAL_SVE_LANE((all >= 5) && (i < 5), p[3], i);
2922 ASSERT_EQUAL_SVE_LANE((all >= 6) && (i < 6), p[4], i);
2923 ASSERT_EQUAL_SVE_LANE((all >= 8) && (i < 8), p[5], i);
2924 ASSERT_EQUAL_SVE_LANE((all >= 16) && (i < 16), p[6], i);
2925 ASSERT_EQUAL_SVE_LANE((all >= 64) && (i < 64), p[7], i);
2926 ASSERT_EQUAL_SVE_LANE((all >= 256) && (i < 256), p[8], i);
2927 ASSERT_EQUAL_SVE_LANE(false, p[9], i);
2928 ASSERT_EQUAL_SVE_LANE(false, p[10], i);
2929 ASSERT_EQUAL_SVE_LANE(false, p[11], i);
2930 ASSERT_EQUAL_SVE_LANE(false, p[12], i);
2931 ASSERT_EQUAL_SVE_LANE(i < mul4, p[13], i);
2932 ASSERT_EQUAL_SVE_LANE(i < mul3, p[14], i);
2933 ASSERT_EQUAL_SVE_LANE(true, p[15], i);
2934 }
2935
2936 // Check NZCV results.
2937 if (s == LeaveFlags) {
2938 // No flags should have been updated.
2939 for (int i = 0; i <= 15; i++) {
2940 ASSERT_EQUAL_64(nzcv_unmodified, XRegister(i));
2941 }
2942 } else {
2943 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
2944 StatusFlags nonzero = SVEFirstFlag;
2945
2946 // POW2
2947 ASSERT_EQUAL_64(nonzero, x0);
2948 // VL*
2949 ASSERT_EQUAL_64((all >= 1) ? nonzero : zero, x1);
2950 ASSERT_EQUAL_64((all >= 2) ? nonzero : zero, x2);
2951 ASSERT_EQUAL_64((all >= 5) ? nonzero : zero, x3);
2952 ASSERT_EQUAL_64((all >= 6) ? nonzero : zero, x4);
2953 ASSERT_EQUAL_64((all >= 8) ? nonzero : zero, x5);
2954 ASSERT_EQUAL_64((all >= 16) ? nonzero : zero, x6);
2955 ASSERT_EQUAL_64((all >= 64) ? nonzero : zero, x7);
2956 ASSERT_EQUAL_64((all >= 256) ? nonzero : zero, x8);
2957 // #uimm5
2958 ASSERT_EQUAL_64(zero, x9);
2959 ASSERT_EQUAL_64(zero, x10);
2960 ASSERT_EQUAL_64(zero, x11);
2961 ASSERT_EQUAL_64(zero, x12);
2962 // MUL*
2963 ASSERT_EQUAL_64((all >= 4) ? nonzero : zero, x13);
2964 ASSERT_EQUAL_64((all >= 3) ? nonzero : zero, x14);
2965 // ALL
2966 ASSERT_EQUAL_64(nonzero, x15);
2967 }
2968 }
2969}
2970
Jacob Bramleye8289202019-07-31 11:25:23 +01002971TEST_SVE(sve_ptrue_b) { PtrueHelper(config, kBRegSize, LeaveFlags); }
2972TEST_SVE(sve_ptrue_h) { PtrueHelper(config, kHRegSize, LeaveFlags); }
2973TEST_SVE(sve_ptrue_s) { PtrueHelper(config, kSRegSize, LeaveFlags); }
2974TEST_SVE(sve_ptrue_d) { PtrueHelper(config, kDRegSize, LeaveFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01002975
Jacob Bramleye8289202019-07-31 11:25:23 +01002976TEST_SVE(sve_ptrues_b) { PtrueHelper(config, kBRegSize, SetFlags); }
2977TEST_SVE(sve_ptrues_h) { PtrueHelper(config, kHRegSize, SetFlags); }
2978TEST_SVE(sve_ptrues_s) { PtrueHelper(config, kSRegSize, SetFlags); }
2979TEST_SVE(sve_ptrues_d) { PtrueHelper(config, kDRegSize, SetFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01002980
Jacob Bramleye8289202019-07-31 11:25:23 +01002981TEST_SVE(sve_pfalse) {
2982 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002983 START();
2984
2985 // Initialise non-zero inputs.
2986 __ Ptrue(p0.VnB());
2987 __ Ptrue(p1.VnH());
2988 __ Ptrue(p2.VnS());
2989 __ Ptrue(p3.VnD());
2990
2991 // The instruction only supports B-sized lanes, but the lane size has no
2992 // logical effect, so the MacroAssembler accepts anything.
2993 __ Pfalse(p0.VnB());
2994 __ Pfalse(p1.VnH());
2995 __ Pfalse(p2.VnS());
2996 __ Pfalse(p3.VnD());
2997
2998 END();
2999
3000 if (CAN_RUN()) {
3001 RUN();
3002
3003 ASSERT_EQUAL_SVE(0, p0.VnB());
3004 ASSERT_EQUAL_SVE(0, p1.VnB());
3005 ASSERT_EQUAL_SVE(0, p2.VnB());
3006 ASSERT_EQUAL_SVE(0, p3.VnB());
3007 }
3008}
3009
Jacob Bramleye8289202019-07-31 11:25:23 +01003010TEST_SVE(sve_ptest) {
3011 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003012 START();
3013
3014 // Initialise NZCV to a known (impossible) value.
3015 StatusFlags nzcv_unmodified = NZCVFlag;
3016 __ Mov(x0, nzcv_unmodified);
3017 __ Msr(NZCV, x0);
3018
3019 // Construct some test inputs.
3020 int in2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0};
3021 int in3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0};
3022 int in4[] = {0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0};
3023 __ Pfalse(p0.VnB());
3024 __ Ptrue(p1.VnB());
3025 Initialise(&masm, p2.VnB(), in2);
3026 Initialise(&masm, p3.VnB(), in3);
3027 Initialise(&masm, p4.VnB(), in4);
3028
3029 // All-inactive pg.
3030 __ Ptest(p0, p0.VnB());
3031 __ Mrs(x0, NZCV);
3032 __ Ptest(p0, p1.VnB());
3033 __ Mrs(x1, NZCV);
3034 __ Ptest(p0, p2.VnB());
3035 __ Mrs(x2, NZCV);
3036 __ Ptest(p0, p3.VnB());
3037 __ Mrs(x3, NZCV);
3038 __ Ptest(p0, p4.VnB());
3039 __ Mrs(x4, NZCV);
3040
3041 // All-active pg.
3042 __ Ptest(p1, p0.VnB());
3043 __ Mrs(x5, NZCV);
3044 __ Ptest(p1, p1.VnB());
3045 __ Mrs(x6, NZCV);
3046 __ Ptest(p1, p2.VnB());
3047 __ Mrs(x7, NZCV);
3048 __ Ptest(p1, p3.VnB());
3049 __ Mrs(x8, NZCV);
3050 __ Ptest(p1, p4.VnB());
3051 __ Mrs(x9, NZCV);
3052
3053 // Combinations of other inputs.
3054 __ Ptest(p2, p2.VnB());
3055 __ Mrs(x20, NZCV);
3056 __ Ptest(p2, p3.VnB());
3057 __ Mrs(x21, NZCV);
3058 __ Ptest(p2, p4.VnB());
3059 __ Mrs(x22, NZCV);
3060 __ Ptest(p3, p2.VnB());
3061 __ Mrs(x23, NZCV);
3062 __ Ptest(p3, p3.VnB());
3063 __ Mrs(x24, NZCV);
3064 __ Ptest(p3, p4.VnB());
3065 __ Mrs(x25, NZCV);
3066 __ Ptest(p4, p2.VnB());
3067 __ Mrs(x26, NZCV);
3068 __ Ptest(p4, p3.VnB());
3069 __ Mrs(x27, NZCV);
3070 __ Ptest(p4, p4.VnB());
3071 __ Mrs(x28, NZCV);
3072
3073 END();
3074
3075 if (CAN_RUN()) {
3076 RUN();
3077
3078 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
3079
3080 // If pg is all inactive, the value of pn is irrelevant.
3081 ASSERT_EQUAL_64(zero, x0);
3082 ASSERT_EQUAL_64(zero, x1);
3083 ASSERT_EQUAL_64(zero, x2);
3084 ASSERT_EQUAL_64(zero, x3);
3085 ASSERT_EQUAL_64(zero, x4);
3086
3087 // All-active pg.
3088 ASSERT_EQUAL_64(zero, x5); // All-inactive pn.
3089 ASSERT_EQUAL_64(SVEFirstFlag, x6); // All-active pn.
3090 // Other pn inputs are non-zero, but the first and last lanes are inactive.
3091 ASSERT_EQUAL_64(SVENotLastFlag, x7);
3092 ASSERT_EQUAL_64(SVENotLastFlag, x8);
3093 ASSERT_EQUAL_64(SVENotLastFlag, x9);
3094
3095 // Other inputs.
3096 ASSERT_EQUAL_64(SVEFirstFlag, x20); // pg: in2, pn: in2
3097 ASSERT_EQUAL_64(NoFlag, x21); // pg: in2, pn: in3
3098 ASSERT_EQUAL_64(zero, x22); // pg: in2, pn: in4
3099 ASSERT_EQUAL_64(static_cast<StatusFlags>(SVEFirstFlag | SVENotLastFlag),
3100 x23); // pg: in3, pn: in2
3101 ASSERT_EQUAL_64(SVEFirstFlag, x24); // pg: in3, pn: in3
3102 ASSERT_EQUAL_64(zero, x25); // pg: in3, pn: in4
3103 ASSERT_EQUAL_64(zero, x26); // pg: in4, pn: in2
3104 ASSERT_EQUAL_64(zero, x27); // pg: in4, pn: in3
3105 ASSERT_EQUAL_64(SVEFirstFlag, x28); // pg: in4, pn: in4
3106 }
3107}
3108
Jacob Bramleye8289202019-07-31 11:25:23 +01003109TEST_SVE(sve_cntp) {
3110 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd961a0c2019-07-17 10:53:45 +01003111 START();
3112
3113 // There are {7, 5, 2, 1} active {B, H, S, D} lanes.
3114 int p0_inputs[] = {0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0};
3115 Initialise(&masm, p0.VnB(), p0_inputs);
3116
3117 // With an all-true predicate, these instructions measure the vector length.
3118 __ Ptrue(p10.VnB());
3119 __ Ptrue(p11.VnH());
3120 __ Ptrue(p12.VnS());
3121 __ Ptrue(p13.VnD());
3122
3123 // `ptrue p10.b` provides an all-active pg.
3124 __ Cntp(x10, p10, p10.VnB());
3125 __ Cntp(x11, p10, p11.VnH());
3126 __ Cntp(x12, p10, p12.VnS());
3127 __ Cntp(x13, p10, p13.VnD());
3128
3129 // Check that the predicate mask is applied properly.
3130 __ Cntp(x14, p10, p10.VnB());
3131 __ Cntp(x15, p11, p10.VnB());
3132 __ Cntp(x16, p12, p10.VnB());
3133 __ Cntp(x17, p13, p10.VnB());
3134
3135 // Check other patterns (including some ignored bits).
3136 __ Cntp(x0, p10, p0.VnB());
3137 __ Cntp(x1, p10, p0.VnH());
3138 __ Cntp(x2, p10, p0.VnS());
3139 __ Cntp(x3, p10, p0.VnD());
3140 __ Cntp(x4, p0, p10.VnB());
3141 __ Cntp(x5, p0, p10.VnH());
3142 __ Cntp(x6, p0, p10.VnS());
3143 __ Cntp(x7, p0, p10.VnD());
3144
3145 END();
3146
3147 if (CAN_RUN()) {
3148 RUN();
3149
3150 int vl_b = core.GetSVELaneCount(kBRegSize);
3151 int vl_h = core.GetSVELaneCount(kHRegSize);
3152 int vl_s = core.GetSVELaneCount(kSRegSize);
3153 int vl_d = core.GetSVELaneCount(kDRegSize);
3154
3155 // Check all-active predicates in various combinations.
3156 ASSERT_EQUAL_64(vl_b, x10);
3157 ASSERT_EQUAL_64(vl_h, x11);
3158 ASSERT_EQUAL_64(vl_s, x12);
3159 ASSERT_EQUAL_64(vl_d, x13);
3160
3161 ASSERT_EQUAL_64(vl_b, x14);
3162 ASSERT_EQUAL_64(vl_h, x15);
3163 ASSERT_EQUAL_64(vl_s, x16);
3164 ASSERT_EQUAL_64(vl_d, x17);
3165
3166 // Check that irrelevant bits are properly ignored.
3167 ASSERT_EQUAL_64(7, x0);
3168 ASSERT_EQUAL_64(5, x1);
3169 ASSERT_EQUAL_64(2, x2);
3170 ASSERT_EQUAL_64(1, x3);
3171
3172 ASSERT_EQUAL_64(7, x4);
3173 ASSERT_EQUAL_64(5, x5);
3174 ASSERT_EQUAL_64(2, x6);
3175 ASSERT_EQUAL_64(1, x7);
3176 }
3177}
3178
TatWai Chong13634762019-07-16 16:20:45 -07003179typedef void (MacroAssembler::*IntBinArithFn)(const ZRegister& zd,
3180 const PRegisterM& pg,
3181 const ZRegister& zn,
3182 const ZRegister& zm);
3183
3184template <typename Td, typename Tg, typename Tn>
3185static void IntBinArithHelper(Test* config,
3186 IntBinArithFn macro,
3187 unsigned lane_size_in_bits,
3188 const Tg& pg_inputs,
3189 const Tn& zn_inputs,
3190 const Tn& zm_inputs,
3191 const Td& zd_expected) {
3192 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3193 START();
3194
3195 ZRegister src_a = z31.WithLaneSize(lane_size_in_bits);
3196 ZRegister src_b = z27.WithLaneSize(lane_size_in_bits);
3197 InsrHelper(&masm, src_a, zn_inputs);
3198 InsrHelper(&masm, src_b, zm_inputs);
3199
3200 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
3201
3202 ZRegister zd_1 = z0.WithLaneSize(lane_size_in_bits);
3203 ZRegister zd_2 = z1.WithLaneSize(lane_size_in_bits);
3204 ZRegister zd_3 = z2.WithLaneSize(lane_size_in_bits);
3205
3206 // `instr` zd(dst), zd(src_a), zn(src_b)
3207 __ Mov(zd_1, src_a);
3208 (masm.*macro)(zd_1, p0.Merging(), zd_1, src_b);
3209
3210 // `instr` zd(dst), zm(src_a), zd(src_b)
3211 // Based on whether zd and zm registers are aliased, the macro of instructions
3212 // (`Instr`) swaps the order of operands if it has the commutative property,
3213 // otherwise, transfer to the reversed `Instr`, such as subr and divr.
3214 __ Mov(zd_2, src_b);
3215 (masm.*macro)(zd_2, p0.Merging(), src_a, zd_2);
3216
3217 // `instr` zd(dst), zm(src_a), zn(src_b)
3218 // The macro of instructions (`Instr`) automatically selects between `instr`
3219 // and movprfx + `instr` based on whether zd and zn registers are aliased.
3220 // A generated moveprfx instruction is predicated that using the same
3221 // governing predicate register. In order to keep the result constant,
3222 // initialize the destination register first.
3223 __ Mov(zd_3, src_a);
3224 (masm.*macro)(zd_3, p0.Merging(), src_a, src_b);
3225
3226 END();
3227
3228 if (CAN_RUN()) {
3229 RUN();
3230 ASSERT_EQUAL_SVE(zd_expected, zd_1);
3231
3232 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
3233 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
3234 if (!core.HasSVELane(zd_1, lane)) break;
3235 if (pg_inputs[i] == 1) {
3236 ASSERT_EQUAL_SVE_LANE(zd_expected[i], zd_1, lane);
3237 } else {
3238 ASSERT_EQUAL_SVE_LANE(zn_inputs[i], zd_1, lane);
3239 }
3240 }
3241
3242 ASSERT_EQUAL_SVE(zd_expected, zd_3);
3243 }
3244}
3245
3246TEST_SVE(sve_binary_arithmetic_predicated_add) {
3247 // clang-format off
3248 unsigned zn_b[] = {0x00, 0x01, 0x10, 0x81, 0xff, 0x0f, 0x01, 0x7f};
3249
3250 unsigned zm_b[] = {0x00, 0x01, 0x10, 0x00, 0x81, 0x80, 0xff, 0xff};
3251
3252 unsigned zn_h[] = {0x0000, 0x0123, 0x1010, 0x8181, 0xffff, 0x0f0f, 0x0101, 0x7f7f};
3253
3254 unsigned zm_h[] = {0x0000, 0x0123, 0x1010, 0x0000, 0x8181, 0x8080, 0xffff, 0xffff};
3255
3256 unsigned zn_s[] = {0x00000000, 0x01234567, 0x10101010, 0x81818181,
3257 0xffffffff, 0x0f0f0f0f, 0x01010101, 0x7f7f7f7f};
3258
3259 unsigned zm_s[] = {0x00000000, 0x01234567, 0x10101010, 0x00000000,
3260 0x81818181, 0x80808080, 0xffffffff, 0xffffffff};
3261
3262 uint64_t zn_d[] = {0x0000000000000000, 0x0123456789abcdef,
3263 0x1010101010101010, 0x8181818181818181,
3264 0xffffffffffffffff, 0x0f0f0f0f0f0f0f0f,
3265 0x0101010101010101, 0x7f7f7f7fffffffff};
3266
3267 uint64_t zm_d[] = {0x0000000000000000, 0x0123456789abcdef,
3268 0x1010101010101010, 0x0000000000000000,
3269 0x8181818181818181, 0x8080808080808080,
3270 0xffffffffffffffff, 0xffffffffffffffff};
3271
3272 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
3273 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
3274 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
3275 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
3276
3277 unsigned add_exp_b[] = {0x00, 0x02, 0x20, 0x81, 0x80, 0x8f, 0x00, 0x7f};
3278
3279 unsigned add_exp_h[] = {0x0000, 0x0246, 0x1010, 0x8181,
3280 0x8180, 0x8f8f, 0x0101, 0x7f7e};
3281
3282 unsigned add_exp_s[] = {0x00000000, 0x01234567, 0x20202020, 0x81818181,
3283 0x81818180, 0x0f0f0f0f, 0x01010100, 0x7f7f7f7e};
3284
3285 uint64_t add_exp_d[] = {0x0000000000000000, 0x02468acf13579bde,
3286 0x2020202020202020, 0x8181818181818181,
3287 0xffffffffffffffff, 0x8f8f8f8f8f8f8f8f,
3288 0x0101010101010100, 0x7f7f7f7ffffffffe};
3289
3290 IntBinArithFn fn = &MacroAssembler::Add;
3291 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, add_exp_b);
3292 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, add_exp_h);
3293 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, add_exp_s);
3294 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, add_exp_d);
3295
3296 unsigned sub_exp_b[] = {0x00, 0x00, 0x00, 0x81, 0x7e, 0x8f, 0x02, 0x7f};
3297
3298 unsigned sub_exp_h[] = {0x0000, 0x0000, 0x1010, 0x8181,
3299 0x7e7e, 0x8e8f, 0x0101, 0x7f80};
3300
3301 unsigned sub_exp_s[] = {0x00000000, 0x01234567, 0x00000000, 0x81818181,
3302 0x7e7e7e7e, 0x0f0f0f0f, 0x01010102, 0x7f7f7f80};
3303
3304 uint64_t sub_exp_d[] = {0x0000000000000000, 0x0000000000000000,
3305 0x0000000000000000, 0x8181818181818181,
3306 0xffffffffffffffff, 0x8e8e8e8e8e8e8e8f,
3307 0x0101010101010102, 0x7f7f7f8000000000};
3308
3309 fn = &MacroAssembler::Sub;
3310 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sub_exp_b);
3311 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sub_exp_h);
3312 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sub_exp_s);
3313 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sub_exp_d);
3314 // clang-format on
3315}
3316
3317TEST_SVE(sve_binary_arithmetic_predicated_umin_umax_uabd) {
3318 // clang-format off
3319 unsigned zn_b[] = {0x00, 0xff, 0x0f, 0xff, 0xf0, 0x98, 0x55, 0x67};
3320
3321 unsigned zm_b[] = {0x01, 0x00, 0x0e, 0xfe, 0xfe, 0xab, 0xcd, 0x78};
3322
3323 unsigned zn_h[] = {0x0000, 0xffff, 0x00ff, 0xffff,
3324 0xff00, 0xba98, 0x5555, 0x4567};
3325
3326 unsigned zm_h[] = {0x0001, 0x0000, 0x00ee, 0xfffe,
3327 0xfe00, 0xabab, 0xcdcd, 0x5678};
3328
3329 unsigned zn_s[] = {0x00000000, 0xffffffff, 0x0000ffff, 0xffffffff,
3330 0xffff0000, 0xfedcba98, 0x55555555, 0x01234567};
3331
3332 unsigned zm_s[] = {0x00000001, 0x00000000, 0x0000eeee, 0xfffffffe,
3333 0xfffe0000, 0xabababab, 0xcdcdcdcd, 0x12345678};
3334
3335 uint64_t zn_d[] = {0x0000000000000000, 0xffffffffffffffff,
3336 0x5555555555555555, 0x0000000001234567};
3337
3338 uint64_t zm_d[] = {0x0000000000000001, 0x0000000000000000,
3339 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
3340
3341 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
3342 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
3343 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
3344 int pg_d[] = {1, 0, 1, 1};
3345
3346 unsigned umax_exp_b[] = {0x01, 0xff, 0x0f, 0xff, 0xfe, 0xab, 0xcd, 0x67};
3347
3348 unsigned umax_exp_h[] = {0x0001, 0xffff, 0x00ff, 0xffff,
3349 0xff00, 0xba98, 0x5555, 0x5678};
3350
3351 unsigned umax_exp_s[] = {0x00000001, 0xffffffff, 0x0000ffff, 0xffffffff,
3352 0xffff0000, 0xfedcba98, 0xcdcdcdcd, 0x12345678};
3353
3354 uint64_t umax_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
3355 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
3356
3357 IntBinArithFn fn = &MacroAssembler::Umax;
3358 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umax_exp_b);
3359 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umax_exp_h);
3360 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umax_exp_s);
3361 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umax_exp_d);
3362
3363 unsigned umin_exp_b[] = {0x00, 0x00, 0x0e, 0xff, 0xf0, 0x98, 0x55, 0x67};
3364
3365 unsigned umin_exp_h[] = {0x0000, 0x0000, 0x00ff, 0xfffe,
3366 0xfe00, 0xabab, 0x5555, 0x4567};
3367
3368 unsigned umin_exp_s[] = {0x00000000, 0xffffffff, 0x0000eeee, 0xfffffffe,
3369 0xfffe0000, 0xfedcba98, 0x55555555, 0x01234567};
3370
3371 uint64_t umin_exp_d[] = {0x0000000000000000, 0xffffffffffffffff,
3372 0x5555555555555555, 0x0000000001234567};
3373 fn = &MacroAssembler::Umin;
3374 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umin_exp_b);
3375 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umin_exp_h);
3376 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umin_exp_s);
3377 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umin_exp_d);
3378
3379 unsigned uabd_exp_b[] = {0x01, 0xff, 0x01, 0xff, 0x0e, 0x13, 0x78, 0x67};
3380
3381 unsigned uabd_exp_h[] = {0x0001, 0xffff, 0x00ff, 0x0001,
3382 0x0100, 0x0eed, 0x5555, 0x1111};
3383
3384 unsigned uabd_exp_s[] = {0x00000001, 0xffffffff, 0x00001111, 0x00000001,
3385 0x00010000, 0xfedcba98, 0x78787878, 0x11111111};
3386
3387 uint64_t uabd_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
3388 0x7878787878787878, 0x0000000011111111};
3389
3390 fn = &MacroAssembler::Uabd;
3391 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, uabd_exp_b);
3392 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, uabd_exp_h);
3393 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, uabd_exp_s);
3394 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, uabd_exp_d);
3395 // clang-format on
3396}
3397
3398TEST_SVE(sve_binary_arithmetic_predicated_smin_smax_sabd) {
3399 // clang-format off
3400 int zn_b[] = {0, -128, -128, -128, -128, 127, 127, 1};
3401
3402 int zm_b[] = {-1, 0, -1, -127, 127, 126, -1, 0};
3403
3404 int zn_h[] = {0, INT16_MIN, INT16_MIN, INT16_MIN,
3405 INT16_MIN, INT16_MAX, INT16_MAX, 1};
3406
3407 int zm_h[] = {-1, 0, -1, INT16_MIN + 1,
3408 INT16_MAX, INT16_MAX - 1, -1, 0};
3409
3410 int zn_s[] = {0, INT32_MIN, INT32_MIN, INT32_MIN,
3411 INT32_MIN, INT32_MAX, INT32_MAX, 1};
3412
3413 int zm_s[] = {-1, 0, -1, -INT32_MAX,
3414 INT32_MAX, INT32_MAX - 1, -1, 0};
3415
3416 int64_t zn_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
3417 INT64_MIN, INT64_MAX, INT64_MAX, 1};
3418
3419 int64_t zm_d[] = {-1, 0, -1, INT64_MIN + 1,
3420 INT64_MAX, INT64_MAX - 1, -1, 0};
3421
3422 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
3423 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
3424 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
3425 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
3426
3427 int smax_exp_b[] = {0, 0, -1, -128, 127, 127, 127, 1};
3428
3429 int smax_exp_h[] = {0, 0, INT16_MIN, INT16_MIN + 1,
3430 INT16_MAX, INT16_MAX, INT16_MAX, 1};
3431
3432 int smax_exp_s[] = {0, INT32_MIN, -1, INT32_MIN + 1,
3433 INT32_MAX, INT32_MAX, INT32_MAX, 1};
3434
3435 int64_t smax_exp_d[] = {0, 0, -1, INT64_MIN + 1,
3436 INT64_MIN, INT64_MAX, INT64_MAX, 1};
3437
3438 IntBinArithFn fn = &MacroAssembler::Smax;
3439 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smax_exp_b);
3440 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smax_exp_h);
3441 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smax_exp_s);
3442 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smax_exp_d);
3443
3444 int smin_exp_b[] = {-1, -128, -128, -128, -128, 126, -1, 1};
3445
3446 int smin_exp_h[] = {-1, INT16_MIN, INT16_MIN, INT16_MIN,
3447 INT16_MIN, INT16_MAX - 1, INT16_MAX, 0};
3448
3449 int smin_exp_s[] = {-1, INT32_MIN, INT32_MIN, INT32_MIN,
3450 INT32_MIN, INT32_MAX, -1, 0};
3451
3452 int64_t smin_exp_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
3453 INT64_MIN, INT64_MAX - 1, -1, 0};
3454
3455 fn = &MacroAssembler::Smin;
3456 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smin_exp_b);
3457 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smin_exp_h);
3458 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smin_exp_s);
3459 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smin_exp_d);
3460
3461 unsigned sabd_exp_b[] = {1, 128, 127, 128, 255, 1, 128, 1};
3462
3463 unsigned sabd_exp_h[] = {1, 0x8000, 0x8000, 1, 0xffff, 1, 0x7fff, 1};
3464
3465 unsigned sabd_exp_s[] = {1, 0x80000000, 0x7fffffff, 1,
3466 0xffffffff, 0x7fffffff, 0x80000000, 1};
3467
3468 uint64_t sabd_exp_d[] = {0, 0x8000000000000000, 0x7fffffffffffffff, 1,
3469 0x8000000000000000, 1, 0x8000000000000000, 1};
3470
3471 fn = &MacroAssembler::Sabd;
3472 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sabd_exp_b);
3473 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sabd_exp_h);
3474 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sabd_exp_s);
3475 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sabd_exp_d);
3476 // clang-format on
3477}
3478
3479TEST_SVE(sve_binary_arithmetic_predicated_mul_umulh) {
3480 // clang-format off
3481 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
3482
3483 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
3484
3485 unsigned zn_h[] = {0x0000, 0x0001, 0x0020, 0x0800,
3486 0x8000, 0xff00, 0x5555, 0xaaaa};
3487
3488 unsigned zm_h[] = {0x007f, 0x00cd, 0x0800, 0xffff,
3489 0x5555, 0xaaaa, 0x0001, 0x1234};
3490
3491 unsigned zn_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
3492 0x12345678, 0xffffffff, 0x55555555, 0xaaaaaaaa};
3493
3494 unsigned zm_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
3495 0x12345678, 0x22223333, 0x55556666, 0x77778888};
3496
3497 uint64_t zn_d[] = {0x0000000000000000, 0x5555555555555555,
3498 0xffffffffffffffff, 0xaaaaaaaaaaaaaaaa};
3499
3500 uint64_t zm_d[] = {0x0000000000000000, 0x1111111133333333,
3501 0xddddddddeeeeeeee, 0xaaaaaaaaaaaaaaaa};
3502
3503 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
3504 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
3505 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
3506 int pg_d[] = {1, 1, 0, 1};
3507
3508 unsigned mul_exp_b[] = {0x00, 0xcd, 0x00, 0xf8, 0x80, 0x56, 0x00, 0x50};
3509
3510 unsigned mul_exp_h[] = {0x0000, 0x0001, 0x0000, 0xf800,
3511 0x8000, 0xff00, 0x5555, 0x9e88};
3512
3513 unsigned mul_exp_s[] = {0x00000000, 0x00000001, 0x00200020, 0x00400000,
3514 0x1df4d840, 0xddddcccd, 0x55555555, 0xb05afa50};
3515
3516 uint64_t mul_exp_d[] = {0x0000000000000000, 0xa4fa4fa4eeeeeeef,
3517 0xffffffffffffffff, 0x38e38e38e38e38e4};
3518
3519 IntBinArithFn fn = &MacroAssembler::Mul;
3520 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, mul_exp_b);
3521 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, mul_exp_h);
3522 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, mul_exp_s);
3523 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, mul_exp_d);
3524
3525 unsigned umulh_exp_b[] = {0x00, 0x00, 0x10, 0x07, 0x80, 0xa9, 0x00, 0x05};
3526
3527 unsigned umulh_exp_h[] = {0x0000, 0x0001, 0x0001, 0x07ff,
3528 0x2aaa, 0xff00, 0x0000, 0x0c22};
3529
3530 unsigned umulh_exp_s[] = {0x00000000, 0x00000000, 0x00200020, 0x00400080,
3531 0x014b66dc, 0x22223332, 0x55555555, 0x4fa505af};
3532
3533 uint64_t umulh_exp_d[] = {0x0000000000000000, 0x05b05b05bbbbbbbb,
3534 0xffffffffffffffff, 0x71c71c71c71c71c6};
3535
3536 fn = &MacroAssembler::Umulh;
3537 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umulh_exp_b);
3538 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umulh_exp_h);
3539 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umulh_exp_s);
3540 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umulh_exp_d);
3541 // clang-format on
3542}
3543
3544TEST_SVE(sve_binary_arithmetic_predicated_smulh) {
3545 // clang-format off
3546 int zn_b[] = {0, 1, -1, INT8_MIN, INT8_MAX, -1, 100, -3};
3547
3548 int zm_b[] = {0, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, -1, 2, 66};
3549
3550 int zn_h[] = {0, 1, -1, INT16_MIN, INT16_MAX, -1, 10000, -3};
3551
3552 int zm_h[] = {0, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, -1, 2, 6666};
3553
3554 int zn_s[] = {0, 1, -1, INT32_MIN, INT32_MAX, -1, 100000000, -3};
3555
3556 int zm_s[] = {0, INT32_MIN, INT32_MIN, INT32_MAX, INT32_MAX, -1, 2, 66666666};
3557
3558 int64_t zn_d[] = {0, -1, INT64_MIN, INT64_MAX};
3559
3560 int64_t zm_d[] = {INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX};
3561
3562 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
3563 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
3564 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
3565 int pg_d[] = {1, 1, 0, 1};
3566
3567 int exp_b[] = {0, -1, 0, -64, INT8_MAX, 0, 0, -1};
3568
3569 int exp_h[] = {0, 1, 0, -16384, 16383, -1, 0, -1};
3570
3571 int exp_s[] = {0, -1, -1, -1073741824, 1073741823, 0, 100000000, -1};
3572
3573 int64_t exp_d[] = {0, -1, INT64_MIN, 4611686018427387903};
3574
3575 IntBinArithFn fn = &MacroAssembler::Smulh;
3576 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, exp_b);
3577 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, exp_h);
3578 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
3579 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
3580 // clang-format on
3581}
3582
3583TEST_SVE(sve_binary_arithmetic_predicated_logical) {
3584 // clang-format off
3585 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
3586 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
3587
3588 unsigned zn_h[] = {0x0000, 0x0001, 0x2020, 0x0008,
3589 0x8000, 0xffff, 0x5555, 0xaaaa};
3590 unsigned zm_h[] = {0x7fff, 0xabcd, 0x8000, 0xffff,
3591 0x5555, 0xaaaa, 0x0000, 0x0800};
3592
3593 unsigned zn_s[] = {0x00000001, 0x20200008, 0x8000ffff, 0x5555aaaa};
3594 unsigned zm_s[] = {0x7fffabcd, 0x8000ffff, 0x5555aaaa, 0x00000800};
3595
3596 uint64_t zn_d[] = {0xfedcba9876543210, 0x0123456789abcdef,
3597 0x0001200880ff55aa, 0x0022446688aaccee};
3598 uint64_t zm_d[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff,
3599 0x7fcd80ff55aa0008, 0x1133557799bbddff};
3600
3601 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
3602 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
3603 int pg_s[] = {1, 1, 1, 0};
3604 int pg_d[] = {1, 1, 0, 1};
3605
3606 unsigned and_exp_b[] = {0x00, 0x01, 0x00, 0x08, 0x80, 0xaa, 0x00, 0x08};
3607
3608 unsigned and_exp_h[] = {0x0000, 0x0001, 0x0000, 0x0008,
3609 0x0000, 0xffff, 0x0000, 0x0800};
3610
3611 unsigned and_exp_s[] = {0x00000001, 0x00000008, 0x0000aaaa, 0x5555aaaa};
3612
3613 uint64_t and_exp_d[] = {0xfedcaa8854540000, 0x0000454588aacdef,
3614 0x0001200880ff55aa, 0x0022446688aaccee};
3615
3616 IntBinArithFn fn = &MacroAssembler::And;
3617 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, and_exp_b);
3618 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, and_exp_h);
3619 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, and_exp_s);
3620 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, and_exp_d);
3621
3622 unsigned bic_exp_b[] = {0x00, 0x00, 0x20, 0x00, 0x80, 0x55, 0x55, 0xa2};
3623
3624 unsigned bic_exp_h[] = {0x0000, 0x0001, 0x2020, 0x0000,
3625 0x8000, 0xffff, 0x5555, 0xa2aa};
3626
3627 unsigned bic_exp_s[] = {0x00000000, 0x20200000, 0x80005555, 0x5555aaaa};
3628
3629 uint64_t bic_exp_d[] = {0x0000101022003210, 0x0123002201010000,
3630 0x0001200880ff55aa, 0x0000000000000000};
3631
3632 fn = &MacroAssembler::Bic;
3633 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, bic_exp_b);
3634 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, bic_exp_h);
3635 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, bic_exp_s);
3636 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, bic_exp_d);
3637
3638 unsigned eor_exp_b[] = {0x00, 0xcc, 0xa0, 0xf7, 0x80, 0x55, 0x55, 0xa2};
3639
3640 unsigned eor_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xfff7,
3641 0xd555, 0xffff, 0x5555, 0xa2aa};
3642
3643 unsigned eor_exp_s[] = {0x7fffabcc, 0xa020fff7, 0xd5555555, 0x5555aaaa};
3644
3645 uint64_t eor_exp_d[] = {0x01235476ab89fedc, 0xcdef98ba67453210,
3646 0x0001200880ff55aa, 0x1111111111111111};
3647
3648 fn = &MacroAssembler::Eor;
3649 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, eor_exp_b);
3650 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, eor_exp_h);
3651 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, eor_exp_s);
3652 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, eor_exp_d);
3653
3654 unsigned orr_exp_b[] = {0x00, 0xcd, 0xa0, 0xff, 0x80, 0xff, 0x55, 0xaa};
3655
3656 unsigned orr_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xffff,
3657 0xd555, 0xffff, 0x5555, 0xaaaa};
3658
3659 unsigned orr_exp_s[] = {0x7fffabcd, 0xa020ffff, 0xd555ffff, 0x5555aaaa};
3660
3661 uint64_t orr_exp_d[] = {0xfffffefeffddfedc, 0xcdefddffefefffff,
3662 0x0001200880ff55aa, 0x1133557799bbddff};
3663
3664 fn = &MacroAssembler::Orr;
3665 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, orr_exp_b);
3666 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, orr_exp_h);
3667 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, orr_exp_s);
3668 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, orr_exp_d);
3669 // clang-format on
3670}
3671
3672TEST_SVE(sve_binary_arithmetic_predicated_sdiv) {
3673 // clang-format off
3674 int zn_s[] = {0, 1, -1, 2468,
3675 INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX,
3676 -11111111, 87654321, 0, 0};
3677
3678 int zm_s[] = {1, -1, 1, 1234,
3679 -1, INT32_MIN, 1, -1,
3680 22222222, 80000000, -1, 0};
3681
3682 int64_t zn_d[] = {0, 1, -1, 2468,
3683 INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX,
3684 -11111111, 87654321, 0, 0};
3685
3686 int64_t zm_d[] = {1, -1, 1, 1234,
3687 -1, INT64_MIN, 1, -1,
3688 22222222, 80000000, -1, 0};
3689
3690 int pg_s[] = {1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0};
3691 int pg_d[] = {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1};
3692
3693 int exp_s[] = {0, 1, -1, 2,
3694 INT32_MIN, 0, INT32_MIN, -INT32_MAX,
3695 0, 1, 0, 0};
3696
3697 int64_t exp_d[] = {0, -1, -1, 2,
3698 INT64_MIN, INT64_MAX, INT64_MIN, -INT64_MAX,
3699 0, 1, 0, 0};
3700
3701 IntBinArithFn fn = &MacroAssembler::Sdiv;
3702 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
3703 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
3704 // clang-format on
3705}
3706
3707TEST_SVE(sve_binary_arithmetic_predicated_udiv) {
3708 // clang-format off
3709 unsigned zn_s[] = {0x00000000, 0x00000001, 0xffffffff, 0x80000000,
3710 0xffffffff, 0x80000000, 0xffffffff, 0x0000f000};
3711
3712 unsigned zm_s[] = {0x00000001, 0xffffffff, 0x80000000, 0x00000002,
3713 0x00000000, 0x00000001, 0x00008000, 0xf0000000};
3714
3715 uint64_t zn_d[] = {0x0000000000000000, 0x0000000000000001,
3716 0xffffffffffffffff, 0x8000000000000000,
3717 0xffffffffffffffff, 0x8000000000000000,
3718 0xffffffffffffffff, 0xf0000000f0000000};
3719
3720 uint64_t zm_d[] = {0x0000000000000001, 0xffffffff00000000,
3721 0x8000000000000000, 0x0000000000000002,
3722 0x8888888888888888, 0x0000000000000001,
3723 0x0000000080000000, 0x00000000f0000000};
3724
3725 int pg_s[] = {1, 1, 0, 1, 1, 0, 1, 1};
3726 int pg_d[] = {1, 0, 1, 1, 1, 1, 0, 1};
3727
3728 unsigned exp_s[] = {0x00000000, 0x00000000, 0xffffffff, 0x40000000,
3729 0x00000000, 0x80000000, 0x0001ffff, 0x00000000};
3730
3731 uint64_t exp_d[] = {0x0000000000000000, 0x0000000000000001,
3732 0x0000000000000001, 0x4000000000000000,
3733 0x0000000000000001, 0x8000000000000000,
3734 0xffffffffffffffff, 0x0000000100000001};
3735
3736 IntBinArithFn fn = &MacroAssembler::Udiv;
3737 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
3738 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
3739 // clang-format on
3740}
3741
TatWai Chongfe536042019-10-23 16:34:11 -07003742typedef void (MacroAssembler::*ArithmeticFn)(const ZRegister& zd,
3743 const ZRegister& zn,
3744 const ZRegister& zm);
TatWai Chong845246b2019-08-08 00:01:58 -07003745
3746template <typename T>
3747static void IntArithHelper(Test* config,
TatWai Chongfe536042019-10-23 16:34:11 -07003748 ArithmeticFn macro,
TatWai Chong845246b2019-08-08 00:01:58 -07003749 unsigned lane_size_in_bits,
3750 const T& zn_inputs,
3751 const T& zm_inputs,
3752 const T& zd_expected) {
3753 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3754 START();
3755
3756 ZRegister zn = z31.WithLaneSize(lane_size_in_bits);
3757 ZRegister zm = z27.WithLaneSize(lane_size_in_bits);
3758 InsrHelper(&masm, zn, zn_inputs);
3759 InsrHelper(&masm, zm, zm_inputs);
3760
3761 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
3762 (masm.*macro)(zd, zn, zm);
3763
3764 END();
3765
3766 if (CAN_RUN()) {
3767 RUN();
3768 ASSERT_EQUAL_SVE(zd_expected, zd);
3769 }
3770}
3771
3772TEST_SVE(sve_arithmetic_unpredicated_add_sqadd_uqadd) {
3773 // clang-format off
TatWai Chong6995bfd2019-09-26 10:48:05 +01003774 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xaa, 0x55, 0xff, 0xf0};
3775 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa, 0x5555, 0xffff, 0xf0f0};
3776 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0x10001010, 0xaaaaaaaa, 0xf000f0f0};
3777 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
TatWai Chong845246b2019-08-08 00:01:58 -07003778 0x1000000010001010, 0xf0000000f000f0f0};
3779
TatWai Chongfe536042019-10-23 16:34:11 -07003780 ArithmeticFn fn = &MacroAssembler::Add;
TatWai Chong845246b2019-08-08 00:01:58 -07003781
3782 unsigned add_exp_b[] = {0x02, 0xfe, 0x20, 0x54, 0xaa, 0xfe, 0xe0};
3783 unsigned add_exp_h[] = {0x0302, 0xfefe, 0x2020, 0x5554, 0xaaaa, 0xfffe, 0xe1e0};
3784 unsigned add_exp_s[] = {0x00030302, 0xfffefefe, 0x20002020, 0x55555554, 0xe001e1e0};
3785 uint64_t add_exp_d[] = {0x0000000300030302, 0xfffffffefffefefe,
3786 0x2000000020002020, 0xe0000001e001e1e0};
3787
TatWai Chong6995bfd2019-09-26 10:48:05 +01003788 IntArithHelper(config, fn, kBRegSize, in_b, in_b, add_exp_b);
3789 IntArithHelper(config, fn, kHRegSize, in_h, in_h, add_exp_h);
3790 IntArithHelper(config, fn, kSRegSize, in_s, in_s, add_exp_s);
3791 IntArithHelper(config, fn, kDRegSize, in_d, in_d, add_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07003792
3793 fn = &MacroAssembler::Sqadd;
3794
3795 unsigned sqadd_exp_b[] = {0x80, 0x7f, 0x20, 0x80, 0x7f, 0xfe, 0xe0};
3796 unsigned sqadd_exp_h[] = {0x8000, 0x7fff, 0x2020, 0x8000, 0x7fff, 0xfffe, 0xe1e0};
3797 unsigned sqadd_exp_s[] = {0x80000000, 0x7fffffff, 0x20002020, 0x80000000, 0xe001e1e0};
3798 uint64_t sqadd_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
3799 0x2000000020002020, 0xe0000001e001e1e0};
3800
TatWai Chong6995bfd2019-09-26 10:48:05 +01003801 IntArithHelper(config, fn, kBRegSize, in_b, in_b, sqadd_exp_b);
3802 IntArithHelper(config, fn, kHRegSize, in_h, in_h, sqadd_exp_h);
3803 IntArithHelper(config, fn, kSRegSize, in_s, in_s, sqadd_exp_s);
3804 IntArithHelper(config, fn, kDRegSize, in_d, in_d, sqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07003805
3806 fn = &MacroAssembler::Uqadd;
3807
3808 unsigned uqadd_exp_b[] = {0xff, 0xfe, 0x20, 0xff, 0xaa, 0xff, 0xff};
3809 unsigned uqadd_exp_h[] = {0xffff, 0xfefe, 0x2020, 0xffff, 0xaaaa, 0xffff, 0xffff};
3810 unsigned uqadd_exp_s[] = {0xffffffff, 0xfffefefe, 0x20002020, 0xffffffff, 0xffffffff};
3811 uint64_t uqadd_exp_d[] = {0xffffffffffffffff, 0xfffffffefffefefe,
3812 0x2000000020002020, 0xffffffffffffffff};
3813
TatWai Chong6995bfd2019-09-26 10:48:05 +01003814 IntArithHelper(config, fn, kBRegSize, in_b, in_b, uqadd_exp_b);
3815 IntArithHelper(config, fn, kHRegSize, in_h, in_h, uqadd_exp_h);
3816 IntArithHelper(config, fn, kSRegSize, in_s, in_s, uqadd_exp_s);
3817 IntArithHelper(config, fn, kDRegSize, in_d, in_d, uqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07003818 // clang-format on
3819}
3820
3821TEST_SVE(sve_arithmetic_unpredicated_sub_sqsub_uqsub) {
3822 // clang-format off
3823
3824 unsigned ins1_b[] = {0x81, 0x7f, 0x7e, 0xaa};
3825 unsigned ins2_b[] = {0x10, 0xf0, 0xf0, 0x55};
3826
3827 unsigned ins1_h[] = {0x8181, 0x7f7f, 0x7e7e, 0xaaaa};
3828 unsigned ins2_h[] = {0x1010, 0xf0f0, 0xf0f0, 0x5555};
3829
3830 unsigned ins1_s[] = {0x80018181, 0x7fff7f7f, 0x7eee7e7e, 0xaaaaaaaa};
3831 unsigned ins2_s[] = {0x10001010, 0xf000f0f0, 0xf000f0f0, 0x55555555};
3832
3833 uint64_t ins1_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
3834 0x7eeeeeee7eee7e7e, 0xaaaaaaaaaaaaaaaa};
3835 uint64_t ins2_d[] = {0x1000000010001010, 0xf0000000f000f0f0,
3836 0xf0000000f000f0f0, 0x5555555555555555};
3837
TatWai Chongfe536042019-10-23 16:34:11 -07003838 ArithmeticFn fn = &MacroAssembler::Sub;
TatWai Chong845246b2019-08-08 00:01:58 -07003839
3840 unsigned ins1_sub_ins2_exp_b[] = {0x71, 0x8f, 0x8e, 0x55};
3841 unsigned ins1_sub_ins2_exp_h[] = {0x7171, 0x8e8f, 0x8d8e, 0x5555};
3842 unsigned ins1_sub_ins2_exp_s[] = {0x70017171, 0x8ffe8e8f, 0x8eed8d8e, 0x55555555};
3843 uint64_t ins1_sub_ins2_exp_d[] = {0x7000000170017171, 0x8ffffffe8ffe8e8f,
3844 0x8eeeeeed8eed8d8e, 0x5555555555555555};
3845
3846 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sub_ins2_exp_b);
3847 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sub_ins2_exp_h);
3848 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sub_ins2_exp_s);
3849 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sub_ins2_exp_d);
3850
3851 unsigned ins2_sub_ins1_exp_b[] = {0x8f, 0x71, 0x72, 0xab};
3852 unsigned ins2_sub_ins1_exp_h[] = {0x8e8f, 0x7171, 0x7272, 0xaaab};
3853 unsigned ins2_sub_ins1_exp_s[] = {0x8ffe8e8f, 0x70017171, 0x71127272, 0xaaaaaaab};
3854 uint64_t ins2_sub_ins1_exp_d[] = {0x8ffffffe8ffe8e8f, 0x7000000170017171,
3855 0x7111111271127272, 0xaaaaaaaaaaaaaaab};
3856
3857 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sub_ins1_exp_b);
3858 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sub_ins1_exp_h);
3859 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sub_ins1_exp_s);
3860 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sub_ins1_exp_d);
3861
3862 fn = &MacroAssembler::Sqsub;
3863
3864 unsigned ins1_sqsub_ins2_exp_b[] = {0x80, 0x7f, 0x7f, 0x80};
3865 unsigned ins1_sqsub_ins2_exp_h[] = {0x8000, 0x7fff, 0x7fff, 0x8000};
3866 unsigned ins1_sqsub_ins2_exp_s[] = {0x80000000, 0x7fffffff, 0x7fffffff, 0x80000000};
3867 uint64_t ins1_sqsub_ins2_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
3868 0x7fffffffffffffff, 0x8000000000000000};
3869
3870 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sqsub_ins2_exp_b);
3871 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sqsub_ins2_exp_h);
3872 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sqsub_ins2_exp_s);
3873 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sqsub_ins2_exp_d);
3874
3875 unsigned ins2_sqsub_ins1_exp_b[] = {0x7f, 0x80, 0x80, 0x7f};
3876 unsigned ins2_sqsub_ins1_exp_h[] = {0x7fff, 0x8000, 0x8000, 0x7fff};
3877 unsigned ins2_sqsub_ins1_exp_s[] = {0x7fffffff, 0x80000000, 0x80000000, 0x7fffffff};
3878 uint64_t ins2_sqsub_ins1_exp_d[] = {0x7fffffffffffffff, 0x8000000000000000,
3879 0x8000000000000000, 0x7fffffffffffffff};
3880
3881 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sqsub_ins1_exp_b);
3882 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sqsub_ins1_exp_h);
3883 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sqsub_ins1_exp_s);
3884 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sqsub_ins1_exp_d);
3885
3886 fn = &MacroAssembler::Uqsub;
3887
3888 unsigned ins1_uqsub_ins2_exp_b[] = {0x71, 0x00, 0x00, 0x55};
3889 unsigned ins1_uqsub_ins2_exp_h[] = {0x7171, 0x0000, 0x0000, 0x5555};
3890 unsigned ins1_uqsub_ins2_exp_s[] = {0x70017171, 0x00000000, 0x00000000, 0x55555555};
3891 uint64_t ins1_uqsub_ins2_exp_d[] = {0x7000000170017171, 0x0000000000000000,
3892 0x0000000000000000, 0x5555555555555555};
3893
3894 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_uqsub_ins2_exp_b);
3895 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_uqsub_ins2_exp_h);
3896 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_uqsub_ins2_exp_s);
3897 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_uqsub_ins2_exp_d);
3898
3899 unsigned ins2_uqsub_ins1_exp_b[] = {0x00, 0x71, 0x72, 0x00};
3900 unsigned ins2_uqsub_ins1_exp_h[] = {0x0000, 0x7171, 0x7272, 0x0000};
3901 unsigned ins2_uqsub_ins1_exp_s[] = {0x00000000, 0x70017171, 0x71127272, 0x00000000};
3902 uint64_t ins2_uqsub_ins1_exp_d[] = {0x0000000000000000, 0x7000000170017171,
3903 0x7111111271127272, 0x0000000000000000};
3904
3905 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_uqsub_ins1_exp_b);
3906 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_uqsub_ins1_exp_h);
3907 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_uqsub_ins1_exp_s);
3908 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_uqsub_ins1_exp_d);
3909 // clang-format on
3910}
3911
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01003912TEST_SVE(sve_rdvl) {
3913 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3914 START();
3915
3916 // Encodable multipliers.
3917 __ Rdvl(x0, 0);
3918 __ Rdvl(x1, 1);
3919 __ Rdvl(x2, 2);
3920 __ Rdvl(x3, 31);
3921 __ Rdvl(x4, -1);
3922 __ Rdvl(x5, -2);
3923 __ Rdvl(x6, -32);
3924
3925 // For unencodable multipliers, the MacroAssembler uses a sequence of
3926 // instructions.
3927 __ Rdvl(x10, 32);
3928 __ Rdvl(x11, -33);
3929 __ Rdvl(x12, 42);
3930 __ Rdvl(x13, -42);
3931
3932 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
3933 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
3934 // occurs in the macro.
3935 __ Rdvl(x14, 0x007fffffffffffff);
3936 __ Rdvl(x15, -0x0080000000000000);
3937
3938 END();
3939
3940 if (CAN_RUN()) {
3941 RUN();
3942
3943 uint64_t vl = config->sve_vl_in_bytes();
3944
3945 ASSERT_EQUAL_64(vl * 0, x0);
3946 ASSERT_EQUAL_64(vl * 1, x1);
3947 ASSERT_EQUAL_64(vl * 2, x2);
3948 ASSERT_EQUAL_64(vl * 31, x3);
3949 ASSERT_EQUAL_64(vl * -1, x4);
3950 ASSERT_EQUAL_64(vl * -2, x5);
3951 ASSERT_EQUAL_64(vl * -32, x6);
3952
3953 ASSERT_EQUAL_64(vl * 32, x10);
3954 ASSERT_EQUAL_64(vl * -33, x11);
3955 ASSERT_EQUAL_64(vl * 42, x12);
3956 ASSERT_EQUAL_64(vl * -42, x13);
3957
3958 ASSERT_EQUAL_64(vl * 0x007fffffffffffff, x14);
3959 ASSERT_EQUAL_64(vl * 0xff80000000000000, x15);
3960 }
3961}
3962
3963TEST_SVE(sve_rdpl) {
3964 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3965 START();
3966
3967 // There is no `rdpl` instruction, so the MacroAssembler maps `Rdpl` onto
3968 // Addpl(xd, xzr, ...).
3969
3970 // Encodable multipliers (as `addvl`).
3971 __ Rdpl(x0, 0);
3972 __ Rdpl(x1, 8);
3973 __ Rdpl(x2, 248);
3974 __ Rdpl(x3, -8);
3975 __ Rdpl(x4, -256);
3976
3977 // Encodable multipliers (as `movz` + `addpl`).
3978 __ Rdpl(x7, 31);
3979 __ Rdpl(x8, -32);
3980
3981 // For unencodable multipliers, the MacroAssembler uses a sequence of
3982 // instructions.
3983 __ Rdpl(x10, 42);
3984 __ Rdpl(x11, -42);
3985
3986 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
3987 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
3988 // occurs in the macro.
3989 __ Rdpl(x12, 0x007fffffffffffff);
3990 __ Rdpl(x13, -0x0080000000000000);
3991
3992 END();
3993
3994 if (CAN_RUN()) {
3995 RUN();
3996
3997 uint64_t vl = config->sve_vl_in_bytes();
3998 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
3999 uint64_t pl = vl / kZRegBitsPerPRegBit;
4000
4001 ASSERT_EQUAL_64(pl * 0, x0);
4002 ASSERT_EQUAL_64(pl * 8, x1);
4003 ASSERT_EQUAL_64(pl * 248, x2);
4004 ASSERT_EQUAL_64(pl * -8, x3);
4005 ASSERT_EQUAL_64(pl * -256, x4);
4006
4007 ASSERT_EQUAL_64(pl * 31, x7);
4008 ASSERT_EQUAL_64(pl * -32, x8);
4009
4010 ASSERT_EQUAL_64(pl * 42, x10);
4011 ASSERT_EQUAL_64(pl * -42, x11);
4012
4013 ASSERT_EQUAL_64(pl * 0x007fffffffffffff, x12);
4014 ASSERT_EQUAL_64(pl * 0xff80000000000000, x13);
4015 }
4016}
4017
4018TEST_SVE(sve_addvl) {
4019 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4020 START();
4021
4022 uint64_t base = 0x1234567800000000;
4023 __ Mov(x30, base);
4024
4025 // Encodable multipliers.
4026 __ Addvl(x0, x30, 0);
4027 __ Addvl(x1, x30, 1);
4028 __ Addvl(x2, x30, 31);
4029 __ Addvl(x3, x30, -1);
4030 __ Addvl(x4, x30, -32);
4031
4032 // For unencodable multipliers, the MacroAssembler uses `Rdvl` and `Add`.
4033 __ Addvl(x5, x30, 32);
4034 __ Addvl(x6, x30, -33);
4035
4036 // Test the limits of the multiplier supported by the `Rdvl` macro.
4037 __ Addvl(x7, x30, 0x007fffffffffffff);
4038 __ Addvl(x8, x30, -0x0080000000000000);
4039
4040 // Check that xzr behaves correctly.
4041 __ Addvl(x9, xzr, 8);
4042 __ Addvl(x10, xzr, 42);
4043
4044 // Check that sp behaves correctly with encodable and unencodable multipliers.
4045 __ Addvl(sp, sp, -5);
4046 __ Addvl(sp, sp, -37);
4047 __ Addvl(x11, sp, -2);
4048 __ Addvl(sp, x11, 2);
4049 __ Addvl(x12, sp, -42);
4050
4051 // Restore the value of sp.
4052 __ Addvl(sp, x11, 39);
4053 __ Addvl(sp, sp, 5);
4054
4055 // Adjust x11 and x12 to make the test sp-agnostic.
4056 __ Sub(x11, sp, x11);
4057 __ Sub(x12, sp, x12);
4058
4059 // Check cases where xd.Is(xn). This stresses scratch register allocation.
4060 __ Mov(x20, x30);
4061 __ Mov(x21, x30);
4062 __ Mov(x22, x30);
4063 __ Addvl(x20, x20, 4);
4064 __ Addvl(x21, x21, 42);
4065 __ Addvl(x22, x22, -0x0080000000000000);
4066
4067 END();
4068
4069 if (CAN_RUN()) {
4070 RUN();
4071
4072 uint64_t vl = config->sve_vl_in_bytes();
4073
4074 ASSERT_EQUAL_64(base + (vl * 0), x0);
4075 ASSERT_EQUAL_64(base + (vl * 1), x1);
4076 ASSERT_EQUAL_64(base + (vl * 31), x2);
4077 ASSERT_EQUAL_64(base + (vl * -1), x3);
4078 ASSERT_EQUAL_64(base + (vl * -32), x4);
4079
4080 ASSERT_EQUAL_64(base + (vl * 32), x5);
4081 ASSERT_EQUAL_64(base + (vl * -33), x6);
4082
4083 ASSERT_EQUAL_64(base + (vl * 0x007fffffffffffff), x7);
4084 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x8);
4085
4086 ASSERT_EQUAL_64(vl * 8, x9);
4087 ASSERT_EQUAL_64(vl * 42, x10);
4088
4089 ASSERT_EQUAL_64(vl * 44, x11);
4090 ASSERT_EQUAL_64(vl * 84, x12);
4091
4092 ASSERT_EQUAL_64(base + (vl * 4), x20);
4093 ASSERT_EQUAL_64(base + (vl * 42), x21);
4094 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x22);
4095
4096 ASSERT_EQUAL_64(base, x30);
4097 }
4098}
4099
4100TEST_SVE(sve_addpl) {
4101 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4102 START();
4103
4104 uint64_t base = 0x1234567800000000;
4105 __ Mov(x30, base);
4106
4107 // Encodable multipliers.
4108 __ Addpl(x0, x30, 0);
4109 __ Addpl(x1, x30, 1);
4110 __ Addpl(x2, x30, 31);
4111 __ Addpl(x3, x30, -1);
4112 __ Addpl(x4, x30, -32);
4113
4114 // For unencodable multipliers, the MacroAssembler uses `Addvl` if it can, or
4115 // it falls back to `Rdvl` and `Add`.
4116 __ Addpl(x5, x30, 32);
4117 __ Addpl(x6, x30, -33);
4118
4119 // Test the limits of the multiplier supported by the `Rdvl` macro.
4120 __ Addpl(x7, x30, 0x007fffffffffffff);
4121 __ Addpl(x8, x30, -0x0080000000000000);
4122
4123 // Check that xzr behaves correctly.
4124 __ Addpl(x9, xzr, 8);
4125 __ Addpl(x10, xzr, 42);
4126
4127 // Check that sp behaves correctly with encodable and unencodable multipliers.
4128 __ Addpl(sp, sp, -5);
4129 __ Addpl(sp, sp, -37);
4130 __ Addpl(x11, sp, -2);
4131 __ Addpl(sp, x11, 2);
4132 __ Addpl(x12, sp, -42);
4133
4134 // Restore the value of sp.
4135 __ Addpl(sp, x11, 39);
4136 __ Addpl(sp, sp, 5);
4137
4138 // Adjust x11 and x12 to make the test sp-agnostic.
4139 __ Sub(x11, sp, x11);
4140 __ Sub(x12, sp, x12);
4141
4142 // Check cases where xd.Is(xn). This stresses scratch register allocation.
4143 __ Mov(x20, x30);
4144 __ Mov(x21, x30);
4145 __ Mov(x22, x30);
4146 __ Addpl(x20, x20, 4);
4147 __ Addpl(x21, x21, 42);
4148 __ Addpl(x22, x22, -0x0080000000000000);
4149
4150 END();
4151
4152 if (CAN_RUN()) {
4153 RUN();
4154
4155 uint64_t vl = config->sve_vl_in_bytes();
4156 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4157 uint64_t pl = vl / kZRegBitsPerPRegBit;
4158
4159 ASSERT_EQUAL_64(base + (pl * 0), x0);
4160 ASSERT_EQUAL_64(base + (pl * 1), x1);
4161 ASSERT_EQUAL_64(base + (pl * 31), x2);
4162 ASSERT_EQUAL_64(base + (pl * -1), x3);
4163 ASSERT_EQUAL_64(base + (pl * -32), x4);
4164
4165 ASSERT_EQUAL_64(base + (pl * 32), x5);
4166 ASSERT_EQUAL_64(base + (pl * -33), x6);
4167
4168 ASSERT_EQUAL_64(base + (pl * 0x007fffffffffffff), x7);
4169 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x8);
4170
4171 ASSERT_EQUAL_64(pl * 8, x9);
4172 ASSERT_EQUAL_64(pl * 42, x10);
4173
4174 ASSERT_EQUAL_64(pl * 44, x11);
4175 ASSERT_EQUAL_64(pl * 84, x12);
4176
4177 ASSERT_EQUAL_64(base + (pl * 4), x20);
4178 ASSERT_EQUAL_64(base + (pl * 42), x21);
4179 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x22);
4180
4181 ASSERT_EQUAL_64(base, x30);
4182 }
4183}
4184
Jacob Bramley1314c462019-08-08 10:54:16 +01004185TEST_SVE(sve_adr_x) {
4186 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4187 START();
4188
4189 uint64_t base = 0x1234567800000000;
4190 __ Mov(x28, base);
4191 __ Mov(x29, 48);
4192 __ Mov(x30, -48);
4193
4194 // Simple scalar (or equivalent) cases.
4195
4196 __ Adr(x0, SVEMemOperand(x28));
4197 __ Adr(x1, SVEMemOperand(x28, 0));
4198 __ Adr(x2, SVEMemOperand(x28, 0, SVE_MUL_VL).ForZRegAccess());
4199 __ Adr(x3, SVEMemOperand(x28, 0, SVE_MUL_VL).ForPRegAccess());
4200 __ Adr(x4, SVEMemOperand(x28, xzr));
4201 __ Adr(x5, SVEMemOperand(x28, xzr, LSL, 42));
4202
4203 // scalar-plus-immediate
4204
4205 // Unscaled immediates, handled with `Add`.
4206 __ Adr(x6, SVEMemOperand(x28, 42));
4207 __ Adr(x7, SVEMemOperand(x28, -42));
4208 // Scaled immediates, handled with `Addvl` or `Addpl`.
4209 __ Adr(x8, SVEMemOperand(x28, 31, SVE_MUL_VL).ForZRegAccess());
4210 __ Adr(x9, SVEMemOperand(x28, -32, SVE_MUL_VL).ForZRegAccess());
4211 __ Adr(x10, SVEMemOperand(x28, 31, SVE_MUL_VL).ForPRegAccess());
4212 __ Adr(x11, SVEMemOperand(x28, -32, SVE_MUL_VL).ForPRegAccess());
4213 // Out of `addvl` or `addpl` range.
4214 __ Adr(x12, SVEMemOperand(x28, 42, SVE_MUL_VL).ForZRegAccess());
4215 __ Adr(x13, SVEMemOperand(x28, -42, SVE_MUL_VL).ForZRegAccess());
4216 __ Adr(x14, SVEMemOperand(x28, 42, SVE_MUL_VL).ForPRegAccess());
4217 __ Adr(x15, SVEMemOperand(x28, -42, SVE_MUL_VL).ForPRegAccess());
4218
4219 // scalar-plus-scalar
4220
4221 __ Adr(x18, SVEMemOperand(x28, x29));
4222 __ Adr(x19, SVEMemOperand(x28, x30));
4223 __ Adr(x20, SVEMemOperand(x28, x29, LSL, 8));
4224 __ Adr(x21, SVEMemOperand(x28, x30, LSL, 8));
4225
4226 // In-place updates, to stress scratch register allocation.
4227
4228 __ Mov(x22, 0xabcd000000000000);
4229 __ Mov(x23, 0xabcd101100000000);
4230 __ Mov(x24, 0xabcd202200000000);
4231 __ Mov(x25, 0xabcd303300000000);
4232 __ Mov(x26, 0xabcd404400000000);
4233 __ Mov(x27, 0xabcd505500000000);
4234
4235 __ Adr(x22, SVEMemOperand(x22));
4236 __ Adr(x23, SVEMemOperand(x23, 0x42));
4237 __ Adr(x24, SVEMemOperand(x24, 3, SVE_MUL_VL).ForZRegAccess());
4238 __ Adr(x25, SVEMemOperand(x25, 0x42, SVE_MUL_VL).ForPRegAccess());
4239 __ Adr(x26, SVEMemOperand(x26, x29));
4240 __ Adr(x27, SVEMemOperand(x27, x30, LSL, 4));
4241
4242 END();
4243
4244 if (CAN_RUN()) {
4245 RUN();
4246
4247 uint64_t vl = config->sve_vl_in_bytes();
4248 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4249 uint64_t pl = vl / kZRegBitsPerPRegBit;
4250
4251 // Simple scalar (or equivalent) cases.
4252 ASSERT_EQUAL_64(base, x0);
4253 ASSERT_EQUAL_64(base, x1);
4254 ASSERT_EQUAL_64(base, x2);
4255 ASSERT_EQUAL_64(base, x3);
4256 ASSERT_EQUAL_64(base, x4);
4257 ASSERT_EQUAL_64(base, x5);
4258
4259 // scalar-plus-immediate
4260 ASSERT_EQUAL_64(base + 42, x6);
4261 ASSERT_EQUAL_64(base - 42, x7);
4262 ASSERT_EQUAL_64(base + (31 * vl), x8);
4263 ASSERT_EQUAL_64(base - (32 * vl), x9);
4264 ASSERT_EQUAL_64(base + (31 * pl), x10);
4265 ASSERT_EQUAL_64(base - (32 * pl), x11);
4266 ASSERT_EQUAL_64(base + (42 * vl), x12);
4267 ASSERT_EQUAL_64(base - (42 * vl), x13);
4268 ASSERT_EQUAL_64(base + (42 * pl), x14);
4269 ASSERT_EQUAL_64(base - (42 * pl), x15);
4270
4271 // scalar-plus-scalar
4272 ASSERT_EQUAL_64(base + 48, x18);
4273 ASSERT_EQUAL_64(base - 48, x19);
4274 ASSERT_EQUAL_64(base + (48 << 8), x20);
4275 ASSERT_EQUAL_64(base - (48 << 8), x21);
4276
4277 // In-place updates.
4278 ASSERT_EQUAL_64(0xabcd000000000000, x22);
4279 ASSERT_EQUAL_64(0xabcd101100000000 + 0x42, x23);
4280 ASSERT_EQUAL_64(0xabcd202200000000 + (3 * vl), x24);
4281 ASSERT_EQUAL_64(0xabcd303300000000 + (0x42 * pl), x25);
4282 ASSERT_EQUAL_64(0xabcd404400000000 + 48, x26);
4283 ASSERT_EQUAL_64(0xabcd505500000000 - (48 << 4), x27);
4284
4285 // Check that the inputs were unmodified.
4286 ASSERT_EQUAL_64(base, x28);
4287 ASSERT_EQUAL_64(48, x29);
4288 ASSERT_EQUAL_64(-48, x30);
4289 }
4290}
4291
TatWai Chong4f28df72019-08-14 17:50:30 -07004292TEST_SVE(sve_permute_vector_unpredicated) {
4293 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
4294 START();
4295
Jacob Bramleye4983d42019-10-08 10:56:15 +01004296 // Initialise registers with known values first.
4297 __ Dup(z1.VnB(), 0x11);
4298 __ Dup(z2.VnB(), 0x22);
4299 __ Dup(z3.VnB(), 0x33);
4300 __ Dup(z4.VnB(), 0x44);
4301
TatWai Chong4f28df72019-08-14 17:50:30 -07004302 __ Mov(x0, 0x0123456789abcdef);
4303 __ Fmov(d0, RawbitsToDouble(0x7ffaaaaa22223456));
4304 __ Insr(z1.VnS(), w0);
4305 __ Insr(z2.VnD(), x0);
4306 __ Insr(z3.VnH(), h0);
4307 __ Insr(z4.VnD(), d0);
4308
4309 uint64_t inputs[] = {0xfedcba9876543210,
4310 0x0123456789abcdef,
4311 0x8f8e8d8c8b8a8988,
4312 0x8786858483828180};
4313
4314 // Initialize a distinguishable value throughout the register first.
4315 __ Dup(z9.VnB(), 0xff);
4316 InsrHelper(&masm, z9.VnD(), inputs);
4317
4318 __ Rev(z5.VnB(), z9.VnB());
4319 __ Rev(z6.VnH(), z9.VnH());
4320 __ Rev(z7.VnS(), z9.VnS());
4321 __ Rev(z8.VnD(), z9.VnD());
4322
4323 int index[7] = {22, 7, 7, 3, 1, 1, 63};
4324 // Broadcasting an data within the input array.
4325 __ Dup(z10.VnB(), z9.VnB(), index[0]);
4326 __ Dup(z11.VnH(), z9.VnH(), index[1]);
4327 __ Dup(z12.VnS(), z9.VnS(), index[2]);
4328 __ Dup(z13.VnD(), z9.VnD(), index[3]);
4329 __ Dup(z14.VnQ(), z9.VnQ(), index[4]);
4330 // Test dst == src
4331 __ Mov(z15, z9);
4332 __ Dup(z15.VnS(), z15.VnS(), index[5]);
4333 // Selecting an data beyond the input array.
4334 __ Dup(z16.VnB(), z9.VnB(), index[6]);
4335
4336 END();
4337
4338 if (CAN_RUN()) {
4339 RUN();
4340
4341 // Insr
Jacob Bramleye4983d42019-10-08 10:56:15 +01004342 uint64_t z1_expected[] = {0x1111111111111111, 0x1111111189abcdef};
4343 uint64_t z2_expected[] = {0x2222222222222222, 0x0123456789abcdef};
4344 uint64_t z3_expected[] = {0x3333333333333333, 0x3333333333333456};
4345 uint64_t z4_expected[] = {0x4444444444444444, 0x7ffaaaaa22223456};
TatWai Chong4f28df72019-08-14 17:50:30 -07004346 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
4347 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
4348 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
4349 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
4350
4351 // Rev
4352 int lane_count = core.GetSVELaneCount(kBRegSize);
4353 for (int i = 0; i < lane_count; i++) {
4354 uint64_t expected =
4355 core.zreg_lane(z5.GetCode(), kBRegSize, lane_count - i - 1);
4356 uint64_t input = core.zreg_lane(z9.GetCode(), kBRegSize, i);
4357 ASSERT_EQUAL_64(expected, input);
4358 }
4359
4360 lane_count = core.GetSVELaneCount(kHRegSize);
4361 for (int i = 0; i < lane_count; i++) {
4362 uint64_t expected =
4363 core.zreg_lane(z6.GetCode(), kHRegSize, lane_count - i - 1);
4364 uint64_t input = core.zreg_lane(z9.GetCode(), kHRegSize, i);
4365 ASSERT_EQUAL_64(expected, input);
4366 }
4367
4368 lane_count = core.GetSVELaneCount(kSRegSize);
4369 for (int i = 0; i < lane_count; i++) {
4370 uint64_t expected =
4371 core.zreg_lane(z7.GetCode(), kSRegSize, lane_count - i - 1);
4372 uint64_t input = core.zreg_lane(z9.GetCode(), kSRegSize, i);
4373 ASSERT_EQUAL_64(expected, input);
4374 }
4375
4376 lane_count = core.GetSVELaneCount(kDRegSize);
4377 for (int i = 0; i < lane_count; i++) {
4378 uint64_t expected =
4379 core.zreg_lane(z8.GetCode(), kDRegSize, lane_count - i - 1);
4380 uint64_t input = core.zreg_lane(z9.GetCode(), kDRegSize, i);
4381 ASSERT_EQUAL_64(expected, input);
4382 }
4383
4384 // Dup
4385 unsigned vl = config->sve_vl_in_bits();
4386 lane_count = core.GetSVELaneCount(kBRegSize);
4387 uint64_t expected_z10 = (vl > (index[0] * kBRegSize)) ? 0x23 : 0;
4388 for (int i = 0; i < lane_count; i++) {
4389 ASSERT_EQUAL_SVE_LANE(expected_z10, z10.VnB(), i);
4390 }
4391
4392 lane_count = core.GetSVELaneCount(kHRegSize);
4393 uint64_t expected_z11 = (vl > (index[1] * kHRegSize)) ? 0x8f8e : 0;
4394 for (int i = 0; i < lane_count; i++) {
4395 ASSERT_EQUAL_SVE_LANE(expected_z11, z11.VnH(), i);
4396 }
4397
4398 lane_count = core.GetSVELaneCount(kSRegSize);
4399 uint64_t expected_z12 = (vl > (index[2] * kSRegSize)) ? 0xfedcba98 : 0;
4400 for (int i = 0; i < lane_count; i++) {
4401 ASSERT_EQUAL_SVE_LANE(expected_z12, z12.VnS(), i);
4402 }
4403
4404 lane_count = core.GetSVELaneCount(kDRegSize);
4405 uint64_t expected_z13 =
4406 (vl > (index[3] * kDRegSize)) ? 0xfedcba9876543210 : 0;
4407 for (int i = 0; i < lane_count; i++) {
4408 ASSERT_EQUAL_SVE_LANE(expected_z13, z13.VnD(), i);
4409 }
4410
4411 lane_count = core.GetSVELaneCount(kDRegSize);
4412 uint64_t expected_z14_lo = 0;
4413 uint64_t expected_z14_hi = 0;
4414 if (vl > (index[4] * kQRegSize)) {
4415 expected_z14_lo = 0x0123456789abcdef;
4416 expected_z14_hi = 0xfedcba9876543210;
4417 }
4418 for (int i = 0; i < lane_count; i += 2) {
4419 ASSERT_EQUAL_SVE_LANE(expected_z14_lo, z14.VnD(), i);
4420 ASSERT_EQUAL_SVE_LANE(expected_z14_hi, z14.VnD(), i + 1);
4421 }
4422
4423 lane_count = core.GetSVELaneCount(kSRegSize);
4424 uint64_t expected_z15 = (vl > (index[5] * kSRegSize)) ? 0x87868584 : 0;
4425 for (int i = 0; i < lane_count; i++) {
4426 ASSERT_EQUAL_SVE_LANE(expected_z15, z15.VnS(), i);
4427 }
4428
4429 lane_count = core.GetSVELaneCount(kBRegSize);
4430 uint64_t expected_z16 = (vl > (index[6] * kBRegSize)) ? 0xff : 0;
4431 for (int i = 0; i < lane_count; i++) {
4432 ASSERT_EQUAL_SVE_LANE(expected_z16, z16.VnB(), i);
4433 }
4434 }
4435}
4436
4437TEST_SVE(sve_permute_vector_unpredicated_uppack_vector_elements) {
4438 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4439 START();
4440
4441 uint64_t z9_inputs[] = {0xfedcba9876543210,
4442 0x0123456789abcdef,
4443 0x8f8e8d8c8b8a8988,
4444 0x8786858483828180};
4445 InsrHelper(&masm, z9.VnD(), z9_inputs);
4446
4447 __ Sunpkhi(z10.VnH(), z9.VnB());
4448 __ Sunpkhi(z11.VnS(), z9.VnH());
4449 __ Sunpkhi(z12.VnD(), z9.VnS());
4450
4451 __ Sunpklo(z13.VnH(), z9.VnB());
4452 __ Sunpklo(z14.VnS(), z9.VnH());
4453 __ Sunpklo(z15.VnD(), z9.VnS());
4454
4455 __ Uunpkhi(z16.VnH(), z9.VnB());
4456 __ Uunpkhi(z17.VnS(), z9.VnH());
4457 __ Uunpkhi(z18.VnD(), z9.VnS());
4458
4459 __ Uunpklo(z19.VnH(), z9.VnB());
4460 __ Uunpklo(z20.VnS(), z9.VnH());
4461 __ Uunpklo(z21.VnD(), z9.VnS());
4462
4463 END();
4464
4465 if (CAN_RUN()) {
4466 RUN();
4467
4468 // Suunpkhi
4469 int lane_count = core.GetSVELaneCount(kHRegSize);
4470 for (int i = lane_count - 1; i >= 0; i--) {
4471 uint16_t expected = core.zreg_lane<uint16_t>(z10.GetCode(), i);
4472 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
4473 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
4474 ASSERT_EQUAL_64(expected, input);
4475 }
4476
4477 lane_count = core.GetSVELaneCount(kSRegSize);
4478 for (int i = lane_count - 1; i >= 0; i--) {
4479 uint32_t expected = core.zreg_lane<uint32_t>(z11.GetCode(), i);
4480 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
4481 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
4482 ASSERT_EQUAL_64(expected, input);
4483 }
4484
4485 lane_count = core.GetSVELaneCount(kDRegSize);
4486 for (int i = lane_count - 1; i >= 0; i--) {
4487 uint64_t expected = core.zreg_lane<uint64_t>(z12.GetCode(), i);
4488 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
4489 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
4490 ASSERT_EQUAL_64(expected, input);
4491 }
4492
4493 // Suunpklo
4494 lane_count = core.GetSVELaneCount(kHRegSize);
4495 for (int i = lane_count - 1; i >= 0; i--) {
4496 uint16_t expected = core.zreg_lane<uint16_t>(z13.GetCode(), i);
4497 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i);
4498 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
4499 ASSERT_EQUAL_64(expected, input);
4500 }
4501
4502 lane_count = core.GetSVELaneCount(kSRegSize);
4503 for (int i = lane_count - 1; i >= 0; i--) {
4504 uint32_t expected = core.zreg_lane<uint32_t>(z14.GetCode(), i);
4505 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i);
4506 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
4507 ASSERT_EQUAL_64(expected, input);
4508 }
4509
4510 lane_count = core.GetSVELaneCount(kDRegSize);
4511 for (int i = lane_count - 1; i >= 0; i--) {
4512 uint64_t expected = core.zreg_lane<uint64_t>(z15.GetCode(), i);
4513 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i);
4514 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
4515 ASSERT_EQUAL_64(expected, input);
4516 }
4517
4518 // Uuunpkhi
4519 lane_count = core.GetSVELaneCount(kHRegSize);
4520 for (int i = lane_count - 1; i >= 0; i--) {
4521 uint16_t expected = core.zreg_lane<uint16_t>(z16.GetCode(), i);
4522 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
4523 ASSERT_EQUAL_64(expected, input);
4524 }
4525
4526 lane_count = core.GetSVELaneCount(kSRegSize);
4527 for (int i = lane_count - 1; i >= 0; i--) {
4528 uint32_t expected = core.zreg_lane<uint32_t>(z17.GetCode(), i);
4529 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
4530 ASSERT_EQUAL_64(expected, input);
4531 }
4532
4533 lane_count = core.GetSVELaneCount(kDRegSize);
4534 for (int i = lane_count - 1; i >= 0; i--) {
4535 uint64_t expected = core.zreg_lane<uint64_t>(z18.GetCode(), i);
4536 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
4537 ASSERT_EQUAL_64(expected, input);
4538 }
4539
4540 // Uuunpklo
4541 lane_count = core.GetSVELaneCount(kHRegSize);
4542 for (int i = lane_count - 1; i >= 0; i--) {
4543 uint16_t expected = core.zreg_lane<uint16_t>(z19.GetCode(), i);
4544 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i);
4545 ASSERT_EQUAL_64(expected, input);
4546 }
4547
4548 lane_count = core.GetSVELaneCount(kSRegSize);
4549 for (int i = lane_count - 1; i >= 0; i--) {
4550 uint32_t expected = core.zreg_lane<uint32_t>(z20.GetCode(), i);
4551 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i);
4552 ASSERT_EQUAL_64(expected, input);
4553 }
4554
4555 lane_count = core.GetSVELaneCount(kDRegSize);
4556 for (int i = lane_count - 1; i >= 0; i--) {
4557 uint64_t expected = core.zreg_lane<uint64_t>(z21.GetCode(), i);
4558 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i);
4559 ASSERT_EQUAL_64(expected, input);
4560 }
4561 }
4562}
4563
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004564TEST_SVE(sve_cnot_not) {
4565 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4566 START();
4567
4568 uint64_t in[] = {0x0000000000000000, 0x00000000e1c30000, 0x123456789abcdef0};
4569
4570 // For simplicity, we re-use the same pg for various lane sizes.
4571 // For D lanes: 1, 1, 0
4572 // For S lanes: 1, 1, 1, 0, 0
4573 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4574 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4575 Initialise(&masm, p0.VnB(), pg_in);
4576 PRegisterM pg = p0.Merging();
4577
4578 // These are merging operations, so we have to initialise the result register.
4579 // We use a mixture of constructive and destructive operations.
4580
4581 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004582 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004583 __ Mov(z30, z31);
4584
4585 // For constructive operations, use a different initial result value.
4586 __ Index(z29.VnB(), 0, -1);
4587
4588 __ Mov(z0, z31);
4589 __ Cnot(z0.VnB(), pg, z0.VnB()); // destructive
4590 __ Mov(z1, z29);
4591 __ Cnot(z1.VnH(), pg, z31.VnH());
4592 __ Mov(z2, z31);
4593 __ Cnot(z2.VnS(), pg, z2.VnS()); // destructive
4594 __ Mov(z3, z29);
4595 __ Cnot(z3.VnD(), pg, z31.VnD());
4596
4597 __ Mov(z4, z29);
4598 __ Not(z4.VnB(), pg, z31.VnB());
4599 __ Mov(z5, z31);
4600 __ Not(z5.VnH(), pg, z5.VnH()); // destructive
4601 __ Mov(z6, z29);
4602 __ Not(z6.VnS(), pg, z31.VnS());
4603 __ Mov(z7, z31);
4604 __ Not(z7.VnD(), pg, z7.VnD()); // destructive
4605
4606 END();
4607
4608 if (CAN_RUN()) {
4609 RUN();
4610
4611 // Check that constructive operations preserve their inputs.
4612 ASSERT_EQUAL_SVE(z30, z31);
4613
4614 // clang-format off
4615
4616 // Cnot (B) destructive
4617 uint64_t expected_z0[] =
4618 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
4619 {0x0000000001000101, 0x01000001e1000101, 0x12340078000000f0};
4620 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4621
4622 // Cnot (H)
4623 uint64_t expected_z1[] =
4624 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4625 {0xe9eaebecedee0001, 0xf1f2000100000001, 0xf9fafbfc0000ff00};
4626 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4627
4628 // Cnot (S) destructive
4629 uint64_t expected_z2[] =
4630 // pg: 0 1 1 1 0 0
4631 {0x0000000000000001, 0x0000000100000000, 0x123456789abcdef0};
4632 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4633
4634 // Cnot (D)
4635 uint64_t expected_z3[] =
4636 // pg: 1 1 0
4637 {0x0000000000000001, 0x0000000000000000, 0xf9fafbfcfdfeff00};
4638 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4639
4640 // Not (B)
4641 uint64_t expected_z4[] =
4642 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
4643 {0xe9eaebecffeeffff, 0xfff2f3fff53cffff, 0xf9faa9fc65432100};
4644 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
4645
4646 // Not (H) destructive
4647 uint64_t expected_z5[] =
4648 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4649 {0x000000000000ffff, 0x0000ffff1e3cffff, 0x123456786543def0};
4650 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
4651
4652 // Not (S)
4653 uint64_t expected_z6[] =
4654 // pg: 0 1 1 1 0 0
4655 {0xe9eaebecffffffff, 0xffffffff1e3cffff, 0xf9fafbfcfdfeff00};
4656 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
4657
4658 // Not (D) destructive
4659 uint64_t expected_z7[] =
4660 // pg: 1 1 0
4661 {0xffffffffffffffff, 0xffffffff1e3cffff, 0x123456789abcdef0};
4662 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
4663
4664 // clang-format on
4665 }
4666}
4667
4668TEST_SVE(sve_fabs_fneg) {
4669 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4670 START();
4671
4672 // Include FP64, FP32 and FP16 signalling NaNs. Most FP operations quieten
4673 // NaNs, but fabs and fneg do not.
4674 uint64_t in[] = {0xc04500004228d140, // Recognisable (+/-42) values.
4675 0xfff00000ff80fc01, // Signalling NaNs.
4676 0x123456789abcdef0};
4677
4678 // For simplicity, we re-use the same pg for various lane sizes.
4679 // For D lanes: 1, 1, 0
4680 // For S lanes: 1, 1, 1, 0, 0
4681 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4682 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4683 Initialise(&masm, p0.VnB(), pg_in);
4684 PRegisterM pg = p0.Merging();
4685
4686 // These are merging operations, so we have to initialise the result register.
4687 // We use a mixture of constructive and destructive operations.
4688
4689 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004690 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004691 __ Mov(z30, z31);
4692
4693 // For constructive operations, use a different initial result value.
4694 __ Index(z29.VnB(), 0, -1);
4695
4696 __ Mov(z0, z29);
4697 __ Fabs(z0.VnH(), pg, z31.VnH());
4698 __ Mov(z1, z31);
4699 __ Fabs(z1.VnS(), pg, z1.VnS()); // destructive
4700 __ Mov(z2, z29);
4701 __ Fabs(z2.VnD(), pg, z31.VnD());
4702
4703 __ Mov(z3, z31);
4704 __ Fneg(z3.VnH(), pg, z3.VnH()); // destructive
4705 __ Mov(z4, z29);
4706 __ Fneg(z4.VnS(), pg, z31.VnS());
4707 __ Mov(z5, z31);
4708 __ Fneg(z5.VnD(), pg, z5.VnD()); // destructive
4709
4710 END();
4711
4712 if (CAN_RUN()) {
4713 RUN();
4714
4715 // Check that constructive operations preserve their inputs.
4716 ASSERT_EQUAL_SVE(z30, z31);
4717
4718 // clang-format off
4719
4720 // Fabs (H)
4721 uint64_t expected_z0[] =
4722 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4723 {0xe9eaebecedee5140, 0xf1f200007f807c01, 0xf9fafbfc1abcff00};
4724 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4725
4726 // Fabs (S) destructive
4727 uint64_t expected_z1[] =
4728 // pg: 0 1 1 1 0 0
4729 {0xc04500004228d140, 0x7ff000007f80fc01, 0x123456789abcdef0};
4730 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4731
4732 // Fabs (D)
4733 uint64_t expected_z2[] =
4734 // pg: 1 1 0
4735 {0x404500004228d140, 0x7ff00000ff80fc01, 0xf9fafbfcfdfeff00};
4736 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4737
4738 // Fneg (H) destructive
4739 uint64_t expected_z3[] =
4740 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4741 {0xc045000042285140, 0xfff080007f807c01, 0x123456781abcdef0};
4742 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4743
4744 // Fneg (S)
4745 uint64_t expected_z4[] =
4746 // pg: 0 1 1 1 0 0
4747 {0xe9eaebecc228d140, 0x7ff000007f80fc01, 0xf9fafbfcfdfeff00};
4748 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
4749
4750 // Fneg (D) destructive
4751 uint64_t expected_z5[] =
4752 // pg: 1 1 0
4753 {0x404500004228d140, 0x7ff00000ff80fc01, 0x123456789abcdef0};
4754 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
4755
4756 // clang-format on
4757 }
4758}
4759
4760TEST_SVE(sve_cls_clz_cnt) {
4761 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4762 START();
4763
4764 uint64_t in[] = {0x0000000000000000, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
4765
4766 // For simplicity, we re-use the same pg for various lane sizes.
4767 // For D lanes: 1, 1, 0
4768 // For S lanes: 1, 1, 1, 0, 0
4769 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4770 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4771 Initialise(&masm, p0.VnB(), pg_in);
4772 PRegisterM pg = p0.Merging();
4773
4774 // These are merging operations, so we have to initialise the result register.
4775 // We use a mixture of constructive and destructive operations.
4776
4777 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004778 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004779 __ Mov(z30, z31);
4780
4781 // For constructive operations, use a different initial result value.
4782 __ Index(z29.VnB(), 0, -1);
4783
4784 __ Mov(z0, z29);
4785 __ Cls(z0.VnB(), pg, z31.VnB());
4786 __ Mov(z1, z31);
4787 __ Clz(z1.VnH(), pg, z1.VnH()); // destructive
4788 __ Mov(z2, z29);
4789 __ Cnt(z2.VnS(), pg, z31.VnS());
4790 __ Mov(z3, z31);
4791 __ Cnt(z3.VnD(), pg, z3.VnD()); // destructive
4792
4793 END();
4794
4795 if (CAN_RUN()) {
4796 RUN();
4797 // Check that non-destructive operations preserve their inputs.
4798 ASSERT_EQUAL_SVE(z30, z31);
4799
4800 // clang-format off
4801
4802 // cls (B)
4803 uint8_t expected_z0[] =
4804 // pg: 0 0 0 0 1 0 1 1
4805 // pg: 1 0 0 1 0 1 1 1
4806 // pg: 0 0 1 0 1 1 1 0
4807 {0xe9, 0xea, 0xeb, 0xec, 7, 0xee, 7, 7,
4808 6, 0xf2, 0xf3, 3, 0xf5, 1, 0, 3,
4809 0xf9, 0xfa, 0, 0xfc, 0, 0, 1, 0x00};
4810 ASSERT_EQUAL_SVE(expected_z0, z0.VnB());
4811
4812 // clz (H) destructive
4813 uint16_t expected_z1[] =
4814 // pg: 0 0 0 1
4815 // pg: 0 1 1 1
4816 // pg: 0 0 1 0
4817 {0x0000, 0x0000, 0x0000, 16,
4818 0xfefc, 0, 0, 0,
4819 0x1234, 0x5678, 0, 0xdef0};
4820 ASSERT_EQUAL_SVE(expected_z1, z1.VnH());
4821
4822 // cnt (S)
4823 uint32_t expected_z2[] =
4824 // pg: 0 1
4825 // pg: 1 1
4826 // pg: 0 0
4827 {0xe9eaebec, 0,
4828 22, 16,
4829 0xf9fafbfc, 0xfdfeff00};
4830 ASSERT_EQUAL_SVE(expected_z2, z2.VnS());
4831
4832 // cnt (D) destructive
4833 uint64_t expected_z3[] =
4834 // pg: 1 1 0
4835 { 0, 38, 0x123456789abcdef0};
4836 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4837
4838 // clang-format on
4839 }
4840}
4841
4842TEST_SVE(sve_sxt) {
4843 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4844 START();
4845
4846 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
4847
4848 // For simplicity, we re-use the same pg for various lane sizes.
4849 // For D lanes: 1, 1, 0
4850 // For S lanes: 1, 1, 1, 0, 0
4851 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4852 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4853 Initialise(&masm, p0.VnB(), pg_in);
4854 PRegisterM pg = p0.Merging();
4855
4856 // These are merging operations, so we have to initialise the result register.
4857 // We use a mixture of constructive and destructive operations.
4858
4859 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004860 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004861 __ Mov(z30, z31);
4862
4863 // For constructive operations, use a different initial result value.
4864 __ Index(z29.VnB(), 0, -1);
4865
4866 __ Mov(z0, z31);
4867 __ Sxtb(z0.VnH(), pg, z0.VnH()); // destructive
4868 __ Mov(z1, z29);
4869 __ Sxtb(z1.VnS(), pg, z31.VnS());
4870 __ Mov(z2, z31);
4871 __ Sxtb(z2.VnD(), pg, z2.VnD()); // destructive
4872 __ Mov(z3, z29);
4873 __ Sxth(z3.VnS(), pg, z31.VnS());
4874 __ Mov(z4, z31);
4875 __ Sxth(z4.VnD(), pg, z4.VnD()); // destructive
4876 __ Mov(z5, z29);
4877 __ Sxtw(z5.VnD(), pg, z31.VnD());
4878
4879 END();
4880
4881 if (CAN_RUN()) {
4882 RUN();
4883 // Check that constructive operations preserve their inputs.
4884 ASSERT_EQUAL_SVE(z30, z31);
4885
4886 // clang-format off
4887
4888 // Sxtb (H) destructive
4889 uint64_t expected_z0[] =
4890 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4891 {0x01f203f405f6fff8, 0xfefcfff0ffc3000f, 0x12345678ffbcdef0};
4892 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4893
4894 // Sxtb (S)
4895 uint64_t expected_z1[] =
4896 // pg: 0 1 1 1 0 0
4897 {0xe9eaebecfffffff8, 0xfffffff00000000f, 0xf9fafbfcfdfeff00};
4898 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4899
4900 // Sxtb (D) destructive
4901 uint64_t expected_z2[] =
4902 // pg: 1 1 0
4903 {0xfffffffffffffff8, 0x000000000000000f, 0x123456789abcdef0};
4904 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4905
4906 // Sxth (S)
4907 uint64_t expected_z3[] =
4908 // pg: 0 1 1 1 0 0
4909 {0xe9eaebec000007f8, 0xfffff8f0ffff870f, 0xf9fafbfcfdfeff00};
4910 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4911
4912 // Sxth (D) destructive
4913 uint64_t expected_z4[] =
4914 // pg: 1 1 0
4915 {0x00000000000007f8, 0xffffffffffff870f, 0x123456789abcdef0};
4916 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
4917
4918 // Sxtw (D)
4919 uint64_t expected_z5[] =
4920 // pg: 1 1 0
4921 {0x0000000005f607f8, 0xffffffffe1c3870f, 0xf9fafbfcfdfeff00};
4922 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
4923
4924 // clang-format on
4925 }
4926}
4927
4928TEST_SVE(sve_uxt) {
4929 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4930 START();
4931
4932 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
4933
4934 // For simplicity, we re-use the same pg for various lane sizes.
4935 // For D lanes: 1, 1, 0
4936 // For S lanes: 1, 1, 1, 0, 0
4937 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4938 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4939 Initialise(&masm, p0.VnB(), pg_in);
4940 PRegisterM pg = p0.Merging();
4941
4942 // These are merging operations, so we have to initialise the result register.
4943 // We use a mixture of constructive and destructive operations.
4944
4945 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004946 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004947 __ Mov(z30, z31);
4948
4949 // For constructive operations, use a different initial result value.
4950 __ Index(z29.VnB(), 0, -1);
4951
4952 __ Mov(z0, z29);
4953 __ Uxtb(z0.VnH(), pg, z31.VnH());
4954 __ Mov(z1, z31);
4955 __ Uxtb(z1.VnS(), pg, z1.VnS()); // destructive
4956 __ Mov(z2, z29);
4957 __ Uxtb(z2.VnD(), pg, z31.VnD());
4958 __ Mov(z3, z31);
4959 __ Uxth(z3.VnS(), pg, z3.VnS()); // destructive
4960 __ Mov(z4, z29);
4961 __ Uxth(z4.VnD(), pg, z31.VnD());
4962 __ Mov(z5, z31);
4963 __ Uxtw(z5.VnD(), pg, z5.VnD()); // destructive
4964
4965 END();
4966
4967 if (CAN_RUN()) {
4968 RUN();
4969 // clang-format off
4970
4971 // Uxtb (H)
4972 uint64_t expected_z0[] =
4973 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4974 {0xe9eaebecedee00f8, 0xf1f200f000c3000f, 0xf9fafbfc00bcff00};
4975 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4976
4977 // Uxtb (S) destructive
4978 uint64_t expected_z1[] =
4979 // pg: 0 1 1 1 0 0
4980 {0x01f203f4000000f8, 0x000000f00000000f, 0x123456789abcdef0};
4981 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4982
4983 // Uxtb (D)
4984 uint64_t expected_z2[] =
4985 // pg: 1 1 0
4986 {0x00000000000000f8, 0x000000000000000f, 0xf9fafbfcfdfeff00};
4987 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4988
4989 // Uxth (S) destructive
4990 uint64_t expected_z3[] =
4991 // pg: 0 1 1 1 0 0
4992 {0x01f203f4000007f8, 0x0000f8f00000870f, 0x123456789abcdef0};
4993 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4994
4995 // Uxth (D)
4996 uint64_t expected_z4[] =
4997 // pg: 1 1 0
4998 {0x00000000000007f8, 0x000000000000870f, 0xf9fafbfcfdfeff00};
4999 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5000
5001 // Uxtw (D) destructive
5002 uint64_t expected_z5[] =
5003 // pg: 1 1 0
5004 {0x0000000005f607f8, 0x00000000e1c3870f, 0x123456789abcdef0};
5005 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5006
5007 // clang-format on
5008 }
5009}
5010
5011TEST_SVE(sve_abs_neg) {
5012 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5013 START();
5014
5015 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
5016
5017 // For simplicity, we re-use the same pg for various lane sizes.
5018 // For D lanes: 1, 1, 0
5019 // For S lanes: 1, 1, 1, 0, 0
5020 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5021 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5022 Initialise(&masm, p0.VnB(), pg_in);
5023 PRegisterM pg = p0.Merging();
5024
5025 InsrHelper(&masm, z31.VnD(), in);
5026
5027 // These are merging operations, so we have to initialise the result register.
5028 // We use a mixture of constructive and destructive operations.
5029
5030 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005031 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005032 __ Mov(z30, z31);
5033
5034 // For constructive operations, use a different initial result value.
5035 __ Index(z29.VnB(), 0, -1);
5036
5037 __ Mov(z0, z31);
5038 __ Abs(z0.VnD(), pg, z0.VnD()); // destructive
5039 __ Mov(z1, z29);
5040 __ Abs(z1.VnB(), pg, z31.VnB());
5041
5042 __ Mov(z2, z31);
5043 __ Neg(z2.VnH(), pg, z2.VnH()); // destructive
5044 __ Mov(z3, z29);
5045 __ Neg(z3.VnS(), pg, z31.VnS());
5046
Jacob Bramleyc0066272019-09-30 16:30:47 +01005047 // The unpredicated form of `Neg` is implemented using `subr`.
5048 __ Mov(z4, z31);
5049 __ Neg(z4.VnB(), z4.VnB()); // destructive
5050 __ Mov(z5, z29);
5051 __ Neg(z5.VnD(), z31.VnD());
5052
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005053 END();
5054
5055 if (CAN_RUN()) {
5056 RUN();
Jacob Bramleyc0066272019-09-30 16:30:47 +01005057
5058 ASSERT_EQUAL_SVE(z30, z31);
5059
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005060 // clang-format off
5061
5062 // Abs (D) destructive
5063 uint64_t expected_z0[] =
5064 // pg: 1 1 0
5065 {0x01f203f405f607f8, 0x0103070f1e3c78f1, 0x123456789abcdef0};
5066 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5067
5068 // Abs (B)
5069 uint64_t expected_z1[] =
5070 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
5071 {0xe9eaebec05ee0708, 0x02f2f310f53d790f, 0xf9fa56fc66442200};
5072 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5073
5074 // Neg (H) destructive
5075 uint64_t expected_z2[] =
5076 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5077 {0x01f203f405f6f808, 0xfefc07101e3d78f1, 0x123456786544def0};
5078 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5079
5080 // Neg (S)
5081 uint64_t expected_z3[] =
5082 // pg: 0 1 1 1 0 0
5083 {0xe9eaebecfa09f808, 0x010307101e3c78f1, 0xf9fafbfcfdfeff00};
5084 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5085
Jacob Bramleyc0066272019-09-30 16:30:47 +01005086 // Neg (B) destructive, unpredicated
5087 uint64_t expected_z4[] =
5088 {0xff0efd0cfb0af908, 0x020408101f3d79f1, 0xeeccaa8866442210};
5089 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5090
5091 // Neg (D) unpredicated
5092 uint64_t expected_z5[] =
5093 {0xfe0dfc0bfa09f808, 0x0103070f1e3c78f1, 0xedcba98765432110};
5094 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5095
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005096 // clang-format on
5097 }
5098}
5099
Jacob Bramley0093bb92019-10-04 15:54:10 +01005100TEST_SVE(sve_cpy) {
5101 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
5102 START();
5103
5104 // For simplicity, we re-use the same pg for various lane sizes.
5105 // For D lanes: 0, 1, 1
5106 // For S lanes: 0, 1, 1, 0, 1
5107 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
5108 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
5109
5110 PRegisterM pg = p7.Merging();
5111 Initialise(&masm, pg.VnB(), pg_in);
5112
5113 // These are merging operations, so we have to initialise the result registers
5114 // for each operation.
5115 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
5116 __ Index(ZRegister(i, kBRegSize), 0, -1);
5117 }
5118
5119 // Recognisable values to copy.
5120 __ Mov(x0, 0xdeadbeefdeadbe42);
5121 __ Mov(x1, 0xdeadbeefdead8421);
5122 __ Mov(x2, 0xdeadbeef80042001);
5123 __ Mov(x3, 0x8000000420000001);
5124
5125 // Use NEON moves, to avoid testing SVE `cpy` against itself.
5126 __ Dup(v28.V2D(), x0);
5127 __ Dup(v29.V2D(), x1);
5128 __ Dup(v30.V2D(), x2);
5129 __ Dup(v31.V2D(), x3);
5130
5131 // Register forms (CPY_z_p_r)
5132 __ Cpy(z0.VnB(), pg, w0);
5133 __ Cpy(z1.VnH(), pg, x1); // X registers are accepted for small lanes.
5134 __ Cpy(z2.VnS(), pg, w2);
5135 __ Cpy(z3.VnD(), pg, x3);
5136
5137 // VRegister forms (CPY_z_p_v)
5138 __ Cpy(z4.VnB(), pg, b28);
5139 __ Cpy(z5.VnH(), pg, h29);
5140 __ Cpy(z6.VnS(), pg, s30);
5141 __ Cpy(z7.VnD(), pg, d31);
5142
5143 // Check that we can copy the stack pointer.
5144 __ Mov(x10, sp);
5145 __ Mov(sp, 0xabcabcabcabcabca); // Set sp to a known value.
5146 __ Cpy(z16.VnB(), pg, sp);
5147 __ Cpy(z17.VnH(), pg, wsp);
5148 __ Cpy(z18.VnS(), pg, wsp);
5149 __ Cpy(z19.VnD(), pg, sp);
5150 __ Mov(sp, x10); // Restore sp.
5151
5152 END();
5153
5154 if (CAN_RUN()) {
5155 RUN();
5156 // clang-format off
5157
5158 uint64_t expected_b[] =
5159 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5160 {0xe9eaebec424242f0, 0x42f2f34242f64242, 0xf942fbfcfdfeff42};
5161 ASSERT_EQUAL_SVE(expected_b, z0.VnD());
5162 ASSERT_EQUAL_SVE(expected_b, z4.VnD());
5163
5164 uint64_t expected_h[] =
5165 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5166 {0xe9eaebec8421eff0, 0xf1f28421f5f68421, 0x8421fbfcfdfe8421};
5167 ASSERT_EQUAL_SVE(expected_h, z1.VnD());
5168 ASSERT_EQUAL_SVE(expected_h, z5.VnD());
5169
5170 uint64_t expected_s[] =
5171 // pg: 0 0 1 1 0 1
5172 {0xe9eaebecedeeeff0, 0x8004200180042001, 0xf9fafbfc80042001};
5173 ASSERT_EQUAL_SVE(expected_s, z2.VnD());
5174 ASSERT_EQUAL_SVE(expected_s, z6.VnD());
5175
5176 uint64_t expected_d[] =
5177 // pg: 0 1 1
5178 {0xe9eaebecedeeeff0, 0x8000000420000001, 0x8000000420000001};
5179 ASSERT_EQUAL_SVE(expected_d, z3.VnD());
5180 ASSERT_EQUAL_SVE(expected_d, z7.VnD());
5181
5182
5183 uint64_t expected_b_sp[] =
5184 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5185 {0xe9eaebeccacacaf0, 0xcaf2f3cacaf6caca, 0xf9cafbfcfdfeffca};
5186 ASSERT_EQUAL_SVE(expected_b_sp, z16.VnD());
5187
5188 uint64_t expected_h_sp[] =
5189 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5190 {0xe9eaebecabcaeff0, 0xf1f2abcaf5f6abca, 0xabcafbfcfdfeabca};
5191 ASSERT_EQUAL_SVE(expected_h_sp, z17.VnD());
5192
5193 uint64_t expected_s_sp[] =
5194 // pg: 0 0 1 1 0 1
5195 {0xe9eaebecedeeeff0, 0xcabcabcacabcabca, 0xf9fafbfccabcabca};
5196 ASSERT_EQUAL_SVE(expected_s_sp, z18.VnD());
5197
5198 uint64_t expected_d_sp[] =
5199 // pg: 0 1 1
5200 {0xe9eaebecedeeeff0, 0xabcabcabcabcabca, 0xabcabcabcabcabca};
5201 ASSERT_EQUAL_SVE(expected_d_sp, z19.VnD());
5202
5203 // clang-format on
5204 }
5205}
5206
TatWai Chong4f28df72019-08-14 17:50:30 -07005207TEST_SVE(sve_permute_vector_unpredicated_table_lookup) {
5208 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5209 START();
5210
5211 uint64_t table_inputs[] = {0xffeeddccbbaa9988, 0x7766554433221100};
5212
5213 int index_b[] = {255, 255, 11, 10, 15, 14, 13, 12, 1, 0, 4, 3, 7, 6, 5, 4};
5214
5215 int index_h[] = {5, 6, 7, 8, 2, 3, 6, 4};
5216
5217 int index_s[] = {1, 3, 2, 31, -1};
5218
5219 int index_d[] = {31, 1};
5220
5221 // Initialize the register with a value that doesn't existed in the table.
5222 __ Dup(z9.VnB(), 0x1f);
5223 InsrHelper(&masm, z9.VnD(), table_inputs);
5224
5225 ZRegister ind_b = z0.WithLaneSize(kBRegSize);
5226 ZRegister ind_h = z1.WithLaneSize(kHRegSize);
5227 ZRegister ind_s = z2.WithLaneSize(kSRegSize);
5228 ZRegister ind_d = z3.WithLaneSize(kDRegSize);
5229
5230 InsrHelper(&masm, ind_b, index_b);
5231 InsrHelper(&masm, ind_h, index_h);
5232 InsrHelper(&masm, ind_s, index_s);
5233 InsrHelper(&masm, ind_d, index_d);
5234
5235 __ Tbl(z26.VnB(), z9.VnB(), ind_b);
5236
5237 __ Tbl(z27.VnH(), z9.VnH(), ind_h);
5238
5239 __ Tbl(z28.VnS(), z9.VnS(), ind_s);
5240
5241 __ Tbl(z29.VnD(), z9.VnD(), ind_d);
5242
5243 END();
5244
5245 if (CAN_RUN()) {
5246 RUN();
5247
5248 // clang-format off
5249 unsigned z26_expected[] = {0x1f, 0x1f, 0xbb, 0xaa, 0xff, 0xee, 0xdd, 0xcc,
5250 0x11, 0x00, 0x44, 0x33, 0x77, 0x66, 0x55, 0x44};
5251
5252 unsigned z27_expected[] = {0xbbaa, 0xddcc, 0xffee, 0x1f1f,
5253 0x5544, 0x7766, 0xddcc, 0x9988};
5254
5255 unsigned z28_expected[] =
5256 {0x77665544, 0xffeeddcc, 0xbbaa9988, 0x1f1f1f1f, 0x1f1f1f1f};
5257
5258 uint64_t z29_expected[] = {0x1f1f1f1f1f1f1f1f, 0xffeeddccbbaa9988};
5259 // clang-format on
5260
5261 unsigned vl = config->sve_vl_in_bits();
5262 for (size_t i = 0; i < ArrayLength(index_b); i++) {
5263 int lane = static_cast<int>(ArrayLength(index_b) - i - 1);
5264 if (!core.HasSVELane(z26.VnB(), lane)) break;
5265 uint64_t expected = (vl > (index_b[i] * kBRegSize)) ? z26_expected[i] : 0;
5266 ASSERT_EQUAL_SVE_LANE(expected, z26.VnB(), lane);
5267 }
5268
5269 for (size_t i = 0; i < ArrayLength(index_h); i++) {
5270 int lane = static_cast<int>(ArrayLength(index_h) - i - 1);
5271 if (!core.HasSVELane(z27.VnH(), lane)) break;
5272 uint64_t expected = (vl > (index_h[i] * kHRegSize)) ? z27_expected[i] : 0;
5273 ASSERT_EQUAL_SVE_LANE(expected, z27.VnH(), lane);
5274 }
5275
5276 for (size_t i = 0; i < ArrayLength(index_s); i++) {
5277 int lane = static_cast<int>(ArrayLength(index_s) - i - 1);
5278 if (!core.HasSVELane(z28.VnS(), lane)) break;
5279 uint64_t expected = (vl > (index_s[i] * kSRegSize)) ? z28_expected[i] : 0;
5280 ASSERT_EQUAL_SVE_LANE(expected, z28.VnS(), lane);
5281 }
5282
5283 for (size_t i = 0; i < ArrayLength(index_d); i++) {
5284 int lane = static_cast<int>(ArrayLength(index_d) - i - 1);
5285 if (!core.HasSVELane(z29.VnD(), lane)) break;
5286 uint64_t expected = (vl > (index_d[i] * kDRegSize)) ? z29_expected[i] : 0;
5287 ASSERT_EQUAL_SVE_LANE(expected, z29.VnD(), lane);
5288 }
5289 }
5290}
5291
Jacob Bramley199339d2019-08-05 18:49:13 +01005292TEST_SVE(ldr_str_z_bi) {
5293 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5294 START();
5295
5296 int vl = config->sve_vl_in_bytes();
5297
5298 // The immediate can address [-256, 255] times the VL, so allocate enough
5299 // space to exceed that in both directions.
5300 int data_size = vl * 1024;
5301
5302 uint8_t* data = new uint8_t[data_size];
5303 memset(data, 0, data_size);
5304
5305 // Set the base half-way through the buffer so we can use negative indices.
5306 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
5307
5308 __ Index(z1.VnB(), 1, 3);
5309 __ Index(z2.VnB(), 2, 5);
5310 __ Index(z3.VnB(), 3, 7);
5311 __ Index(z4.VnB(), 4, 11);
5312 __ Index(z5.VnB(), 5, 13);
5313 __ Index(z6.VnB(), 6, 2);
5314 __ Index(z7.VnB(), 7, 3);
5315 __ Index(z8.VnB(), 8, 5);
5316 __ Index(z9.VnB(), 9, 7);
5317
5318 // Encodable cases.
5319 __ Str(z1, SVEMemOperand(x0));
5320 __ Str(z2, SVEMemOperand(x0, 2, SVE_MUL_VL));
5321 __ Str(z3, SVEMemOperand(x0, -3, SVE_MUL_VL));
5322 __ Str(z4, SVEMemOperand(x0, 255, SVE_MUL_VL));
5323 __ Str(z5, SVEMemOperand(x0, -256, SVE_MUL_VL));
5324
5325 // Cases that fall back on `Adr`.
5326 __ Str(z6, SVEMemOperand(x0, 6 * vl));
5327 __ Str(z7, SVEMemOperand(x0, -7 * vl));
5328 __ Str(z8, SVEMemOperand(x0, 314, SVE_MUL_VL));
5329 __ Str(z9, SVEMemOperand(x0, -314, SVE_MUL_VL));
5330
5331 // Corresponding loads.
5332 __ Ldr(z11, SVEMemOperand(x0, xzr)); // Test xzr operand.
5333 __ Ldr(z12, SVEMemOperand(x0, 2, SVE_MUL_VL));
5334 __ Ldr(z13, SVEMemOperand(x0, -3, SVE_MUL_VL));
5335 __ Ldr(z14, SVEMemOperand(x0, 255, SVE_MUL_VL));
5336 __ Ldr(z15, SVEMemOperand(x0, -256, SVE_MUL_VL));
5337
5338 __ Ldr(z16, SVEMemOperand(x0, 6 * vl));
5339 __ Ldr(z17, SVEMemOperand(x0, -7 * vl));
5340 __ Ldr(z18, SVEMemOperand(x0, 314, SVE_MUL_VL));
5341 __ Ldr(z19, SVEMemOperand(x0, -314, SVE_MUL_VL));
5342
5343 END();
5344
5345 if (CAN_RUN()) {
5346 RUN();
5347
5348 uint8_t* expected = new uint8_t[data_size];
5349 memset(expected, 0, data_size);
5350 uint8_t* middle = &expected[data_size / 2];
5351
5352 for (int i = 0; i < vl; i++) {
5353 middle[i] = (1 + (3 * i)) & 0xff; // z1
5354 middle[(2 * vl) + i] = (2 + (5 * i)) & 0xff; // z2
5355 middle[(-3 * vl) + i] = (3 + (7 * i)) & 0xff; // z3
5356 middle[(255 * vl) + i] = (4 + (11 * i)) & 0xff; // z4
5357 middle[(-256 * vl) + i] = (5 + (13 * i)) & 0xff; // z5
5358 middle[(6 * vl) + i] = (6 + (2 * i)) & 0xff; // z6
5359 middle[(-7 * vl) + i] = (7 + (3 * i)) & 0xff; // z7
5360 middle[(314 * vl) + i] = (8 + (5 * i)) & 0xff; // z8
5361 middle[(-314 * vl) + i] = (9 + (7 * i)) & 0xff; // z9
5362 }
5363
Jacob Bramley33c99f92019-10-08 15:24:12 +01005364 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley199339d2019-08-05 18:49:13 +01005365
5366 ASSERT_EQUAL_SVE(z1, z11);
5367 ASSERT_EQUAL_SVE(z2, z12);
5368 ASSERT_EQUAL_SVE(z3, z13);
5369 ASSERT_EQUAL_SVE(z4, z14);
5370 ASSERT_EQUAL_SVE(z5, z15);
5371 ASSERT_EQUAL_SVE(z6, z16);
5372 ASSERT_EQUAL_SVE(z7, z17);
5373 ASSERT_EQUAL_SVE(z8, z18);
5374 ASSERT_EQUAL_SVE(z9, z19);
5375
5376 delete[] expected;
5377 }
5378 delete[] data;
5379}
5380
5381TEST_SVE(ldr_str_p_bi) {
5382 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5383 START();
5384
5385 int vl = config->sve_vl_in_bytes();
5386 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
5387 int pl = vl / kZRegBitsPerPRegBit;
5388
5389 // The immediate can address [-256, 255] times the PL, so allocate enough
5390 // space to exceed that in both directions.
5391 int data_size = pl * 1024;
5392
5393 uint8_t* data = new uint8_t[data_size];
5394 memset(data, 0, data_size);
5395
5396 // Set the base half-way through the buffer so we can use negative indices.
5397 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
5398
5399 uint64_t pattern[4] = {0x1010101011101111,
5400 0x0010111011000101,
5401 0x1001101110010110,
5402 0x1010110101100011};
5403 for (int i = 8; i <= 15; i++) {
5404 // Initialise p8-p15 with a conveniently-recognisable, non-zero pattern.
5405 Initialise(&masm,
5406 PRegister(i),
5407 pattern[3] * i,
5408 pattern[2] * i,
5409 pattern[1] * i,
5410 pattern[0] * i);
5411 }
5412
5413 // Encodable cases.
5414 __ Str(p8, SVEMemOperand(x0));
5415 __ Str(p9, SVEMemOperand(x0, 2, SVE_MUL_VL));
5416 __ Str(p10, SVEMemOperand(x0, -3, SVE_MUL_VL));
5417 __ Str(p11, SVEMemOperand(x0, 255, SVE_MUL_VL));
5418
5419 // Cases that fall back on `Adr`.
5420 __ Str(p12, SVEMemOperand(x0, 6 * pl));
5421 __ Str(p13, SVEMemOperand(x0, -7 * pl));
5422 __ Str(p14, SVEMemOperand(x0, 314, SVE_MUL_VL));
5423 __ Str(p15, SVEMemOperand(x0, -314, SVE_MUL_VL));
5424
5425 // Corresponding loads.
5426 __ Ldr(p0, SVEMemOperand(x0));
5427 __ Ldr(p1, SVEMemOperand(x0, 2, SVE_MUL_VL));
5428 __ Ldr(p2, SVEMemOperand(x0, -3, SVE_MUL_VL));
5429 __ Ldr(p3, SVEMemOperand(x0, 255, SVE_MUL_VL));
5430
5431 __ Ldr(p4, SVEMemOperand(x0, 6 * pl));
5432 __ Ldr(p5, SVEMemOperand(x0, -7 * pl));
5433 __ Ldr(p6, SVEMemOperand(x0, 314, SVE_MUL_VL));
5434 __ Ldr(p7, SVEMemOperand(x0, -314, SVE_MUL_VL));
5435
5436 END();
5437
5438 if (CAN_RUN()) {
5439 RUN();
5440
5441 uint8_t* expected = new uint8_t[data_size];
5442 memset(expected, 0, data_size);
5443 uint8_t* middle = &expected[data_size / 2];
5444
5445 for (int i = 0; i < pl; i++) {
5446 int bit_index = (i % sizeof(pattern[0])) * kBitsPerByte;
5447 size_t index = i / sizeof(pattern[0]);
5448 VIXL_ASSERT(index < ArrayLength(pattern));
5449 uint64_t byte = (pattern[index] >> bit_index) & 0xff;
5450 // Each byte of `pattern` can be multiplied by 15 without carry.
5451 VIXL_ASSERT((byte * 15) <= 0xff);
5452
5453 middle[i] = byte * 8; // p8
5454 middle[(2 * pl) + i] = byte * 9; // p9
5455 middle[(-3 * pl) + i] = byte * 10; // p10
5456 middle[(255 * pl) + i] = byte * 11; // p11
5457 middle[(6 * pl) + i] = byte * 12; // p12
5458 middle[(-7 * pl) + i] = byte * 13; // p13
5459 middle[(314 * pl) + i] = byte * 14; // p14
5460 middle[(-314 * pl) + i] = byte * 15; // p15
5461 }
5462
Jacob Bramley33c99f92019-10-08 15:24:12 +01005463 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley199339d2019-08-05 18:49:13 +01005464
5465 ASSERT_EQUAL_SVE(p0, p8);
5466 ASSERT_EQUAL_SVE(p1, p9);
5467 ASSERT_EQUAL_SVE(p2, p10);
5468 ASSERT_EQUAL_SVE(p3, p11);
5469 ASSERT_EQUAL_SVE(p4, p12);
5470 ASSERT_EQUAL_SVE(p5, p13);
5471 ASSERT_EQUAL_SVE(p6, p14);
5472 ASSERT_EQUAL_SVE(p7, p15);
5473
5474 delete[] expected;
5475 }
5476 delete[] data;
5477}
5478
Jacob Bramleye668b202019-08-14 17:57:34 +01005479template <typename T>
5480static void MemoryWrite(uint8_t* base, int64_t offset, int64_t index, T data) {
5481 memcpy(base + offset + (index * sizeof(data)), &data, sizeof(data));
5482}
5483
5484TEST_SVE(sve_ld1_st1_contiguous) {
5485 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5486 START();
5487
5488 int vl = config->sve_vl_in_bytes();
5489
5490 // The immediate can address [-8, 7] times the VL, so allocate enough space to
5491 // exceed that in both directions.
5492 int data_size = vl * 128;
5493
5494 uint8_t* data = new uint8_t[data_size];
5495 memset(data, 0, data_size);
5496
5497 // Set the base half-way through the buffer so we can use negative indeces.
5498 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
5499
Jacob Bramleye668b202019-08-14 17:57:34 +01005500 // Encodable scalar-plus-immediate cases.
5501 __ Index(z1.VnB(), 1, -3);
5502 __ Ptrue(p1.VnB());
5503 __ St1b(z1.VnB(), p1, SVEMemOperand(x0));
5504
5505 __ Index(z2.VnH(), -2, 5);
5506 __ Ptrue(p2.VnH(), SVE_MUL3);
5507 __ St1b(z2.VnH(), p2, SVEMemOperand(x0, 7, SVE_MUL_VL));
5508
5509 __ Index(z3.VnS(), 3, -7);
5510 __ Ptrue(p3.VnS(), SVE_POW2);
5511 __ St1h(z3.VnS(), p3, SVEMemOperand(x0, -8, SVE_MUL_VL));
5512
5513 // Encodable scalar-plus-scalar cases.
5514 __ Index(z4.VnD(), -4, 11);
5515 __ Ptrue(p4.VnD(), SVE_VL3);
5516 __ Addvl(x1, x0, 8); // Try not to overlap with VL-dependent cases.
5517 __ Mov(x2, 17);
5518 __ St1b(z4.VnD(), p4, SVEMemOperand(x1, x2));
5519
5520 __ Index(z5.VnD(), 6, -2);
5521 __ Ptrue(p5.VnD(), SVE_VL16);
TatWai Chong6205eb42019-09-24 10:07:20 +01005522 __ Addvl(x3, x0, 10); // Try not to overlap with VL-dependent cases.
5523 __ Mov(x4, 6);
5524 __ St1d(z5.VnD(), p5, SVEMemOperand(x3, x4, LSL, 3));
Jacob Bramleye668b202019-08-14 17:57:34 +01005525
5526 // Unencodable cases fall back on `Adr`.
5527 __ Index(z6.VnS(), -7, 3);
5528 // Setting SVE_ALL on B lanes checks that the Simulator ignores irrelevant
5529 // predicate bits when handling larger lanes.
5530 __ Ptrue(p6.VnB(), SVE_ALL);
5531 __ St1w(z6.VnS(), p6, SVEMemOperand(x0, 42, SVE_MUL_VL));
5532
TatWai Chong6205eb42019-09-24 10:07:20 +01005533 __ Index(z7.VnD(), 32, -11);
5534 __ Ptrue(p7.VnD(), SVE_MUL4);
5535 __ St1w(z7.VnD(), p7, SVEMemOperand(x0, 22, SVE_MUL_VL));
Jacob Bramleye668b202019-08-14 17:57:34 +01005536
TatWai Chong6205eb42019-09-24 10:07:20 +01005537 // Corresponding loads.
5538 __ Ld1b(z8.VnB(), p1.Zeroing(), SVEMemOperand(x0));
5539 __ Ld1b(z9.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
5540 __ Ld1h(z10.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
5541 __ Ld1b(z11.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
5542 __ Ld1d(z12.VnD(), p5.Zeroing(), SVEMemOperand(x3, x4, LSL, 3));
5543 __ Ld1w(z13.VnS(), p6.Zeroing(), SVEMemOperand(x0, 42, SVE_MUL_VL));
5544
5545 __ Ld1sb(z14.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
5546 __ Ld1sh(z15.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
5547 __ Ld1sb(z16.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
5548 __ Ld1sw(z17.VnD(), p7.Zeroing(), SVEMemOperand(x0, 22, SVE_MUL_VL));
5549
5550 // We can test ld1 by comparing the value loaded with the value stored. In
5551 // most cases, there are two complications:
5552 // - Loads have zeroing predication, so we have to clear the inactive
5553 // elements on our reference.
5554 // - We have to replicate any sign- or zero-extension.
5555
5556 // Ld1b(z8.VnB(), ...)
5557 __ Dup(z18.VnB(), 0);
5558 __ Mov(z18.VnB(), p1.Merging(), z1.VnB());
5559
5560 // Ld1b(z9.VnH(), ...)
5561 __ Dup(z19.VnH(), 0);
5562 __ Uxtb(z19.VnH(), p2.Merging(), z2.VnH());
5563
5564 // Ld1h(z10.VnS(), ...)
5565 __ Dup(z20.VnS(), 0);
5566 __ Uxth(z20.VnS(), p3.Merging(), z3.VnS());
5567
5568 // Ld1b(z11.VnD(), ...)
5569 __ Dup(z21.VnD(), 0);
5570 __ Uxtb(z21.VnD(), p4.Merging(), z4.VnD());
5571
5572 // Ld1d(z12.VnD(), ...)
5573 __ Dup(z22.VnD(), 0);
5574 __ Mov(z22.VnD(), p5.Merging(), z5.VnD());
5575
5576 // Ld1w(z13.VnS(), ...)
5577 __ Dup(z23.VnS(), 0);
5578 __ Mov(z23.VnS(), p6.Merging(), z6.VnS());
5579
5580 // Ld1sb(z14.VnH(), ...)
5581 __ Dup(z24.VnH(), 0);
5582 __ Sxtb(z24.VnH(), p2.Merging(), z2.VnH());
5583
5584 // Ld1sh(z15.VnS(), ...)
5585 __ Dup(z25.VnS(), 0);
5586 __ Sxth(z25.VnS(), p3.Merging(), z3.VnS());
5587
5588 // Ld1sb(z16.VnD(), ...)
5589 __ Dup(z26.VnD(), 0);
5590 __ Sxtb(z26.VnD(), p4.Merging(), z4.VnD());
5591
5592 // Ld1sw(z17.VnD(), ...)
5593 __ Dup(z27.VnD(), 0);
5594 __ Sxtw(z27.VnD(), p7.Merging(), z7.VnD());
Jacob Bramleye668b202019-08-14 17:57:34 +01005595
5596 END();
5597
5598 if (CAN_RUN()) {
5599 RUN();
5600
5601 uint8_t* expected = new uint8_t[data_size];
5602 memset(expected, 0, data_size);
5603 uint8_t* middle = &expected[data_size / 2];
5604
5605 int vl_b = vl / kBRegSizeInBytes;
5606 int vl_h = vl / kHRegSizeInBytes;
5607 int vl_s = vl / kSRegSizeInBytes;
5608 int vl_d = vl / kDRegSizeInBytes;
5609
5610 // Encodable cases.
5611
5612 // st1b { z1.b }, SVE_ALL
5613 for (int i = 0; i < vl_b; i++) {
5614 MemoryWrite(middle, 0, i, static_cast<uint8_t>(1 - (3 * i)));
5615 }
5616
5617 // st1b { z2.h }, SVE_MUL3
5618 int vl_h_mul3 = vl_h - (vl_h % 3);
5619 for (int i = 0; i < vl_h_mul3; i++) {
5620 MemoryWrite(middle, 7 * vl, i, static_cast<uint8_t>(-2 + (5 * i)));
5621 }
5622
5623 // st1h { z3.s }, SVE_POW2
5624 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
5625 for (int i = 0; i < vl_s_pow2; i++) {
5626 MemoryWrite(middle, -8 * vl, i, static_cast<uint16_t>(3 - (7 * i)));
5627 }
5628
5629 // st1b { z4.d }, SVE_VL3
5630 if (vl_d >= 3) {
5631 for (int i = 0; i < 3; i++) {
5632 MemoryWrite(middle,
5633 (8 * vl) + 17,
5634 i,
5635 static_cast<uint8_t>(-4 + (11 * i)));
5636 }
5637 }
5638
5639 // st1d { z5.d }, SVE_VL16
5640 if (vl_d >= 16) {
5641 for (int i = 0; i < 16; i++) {
5642 MemoryWrite(middle,
5643 (10 * vl) + (6 * kDRegSizeInBytes),
5644 i,
5645 static_cast<uint64_t>(6 - (2 * i)));
5646 }
5647 }
5648
5649 // Unencodable cases.
5650
5651 // st1w { z6.s }, SVE_ALL
5652 for (int i = 0; i < vl_s; i++) {
5653 MemoryWrite(middle, 42 * vl, i, static_cast<uint32_t>(-7 + (3 * i)));
5654 }
5655
TatWai Chong6205eb42019-09-24 10:07:20 +01005656 // st1w { z7.d }, SVE_MUL4
5657 int vl_d_mul4 = vl_d - (vl_d % 4);
5658 for (int i = 0; i < vl_d_mul4; i++) {
5659 MemoryWrite(middle, 22 * vl, i, static_cast<uint32_t>(32 + (-11 * i)));
5660 }
5661
Jacob Bramley33c99f92019-10-08 15:24:12 +01005662 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramleye668b202019-08-14 17:57:34 +01005663
TatWai Chong6205eb42019-09-24 10:07:20 +01005664 // Check that we loaded back the expected values.
5665
5666 ASSERT_EQUAL_SVE(z18, z8);
5667 ASSERT_EQUAL_SVE(z19, z9);
5668 ASSERT_EQUAL_SVE(z20, z10);
5669 ASSERT_EQUAL_SVE(z21, z11);
5670 ASSERT_EQUAL_SVE(z22, z12);
5671 ASSERT_EQUAL_SVE(z23, z13);
5672 ASSERT_EQUAL_SVE(z24, z14);
5673 ASSERT_EQUAL_SVE(z25, z15);
5674 ASSERT_EQUAL_SVE(z26, z16);
5675 ASSERT_EQUAL_SVE(z27, z17);
5676
Jacob Bramleye668b202019-08-14 17:57:34 +01005677 delete[] expected;
5678 }
5679 delete[] data;
5680}
5681
TatWai Chong6995bfd2019-09-26 10:48:05 +01005682typedef void (MacroAssembler::*IntWideImmFn)(const ZRegister& zd,
5683 const ZRegister& zn,
5684 const IntegerOperand imm);
5685
5686template <typename F, typename Td, typename Tn>
5687static void IntWideImmHelper(Test* config,
5688 F macro,
5689 unsigned lane_size_in_bits,
5690 const Tn& zn_inputs,
5691 IntegerOperand imm,
5692 const Td& zd_expected) {
5693 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5694 START();
5695
5696 ZRegister zd1 = z0.WithLaneSize(lane_size_in_bits);
5697 InsrHelper(&masm, zd1, zn_inputs);
5698
5699 // Also test with a different zn, to test the movprfx case.
5700 ZRegister zn = z1.WithLaneSize(lane_size_in_bits);
5701 InsrHelper(&masm, zn, zn_inputs);
5702 ZRegister zd2 = z2.WithLaneSize(lane_size_in_bits);
5703 ZRegister zn_copy = z3.WithSameLaneSizeAs(zn);
5704
5705 // Make a copy so we can check that constructive operations preserve zn.
5706 __ Mov(zn_copy, zn);
5707
5708 {
5709 UseScratchRegisterScope temps(&masm);
5710 // The MacroAssembler needs a P scratch register for some of these macros,
5711 // and it doesn't have one by default.
5712 temps.Include(p3);
5713
5714 (masm.*macro)(zd1, zd1, imm);
5715 (masm.*macro)(zd2, zn, imm);
5716 }
5717
5718 END();
5719
5720 if (CAN_RUN()) {
5721 RUN();
5722
5723 ASSERT_EQUAL_SVE(zd_expected, zd1);
5724
5725 // Check the result from `instr` with movprfx is the same as
5726 // the immediate version.
5727 ASSERT_EQUAL_SVE(zd_expected, zd2);
5728
5729 ASSERT_EQUAL_SVE(zn_copy, zn);
5730 }
5731}
5732
5733TEST_SVE(sve_int_wide_imm_unpredicated_smax) {
5734 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
5735 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
5736 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
5737 int64_t in_d[] = {1, 10, 10000, 1000000};
5738
5739 IntWideImmFn fn = &MacroAssembler::Smax;
5740
5741 int exp_b_1[] = {0, -1, 127, -1, 126, 1, -1, 55};
5742 int exp_h_1[] = {127, 127, 127, 127, INT16_MAX, 127, 127, 5555};
5743 int exp_s_1[] = {0, -128, 127, -128, INT32_MAX, 1, -1, 555555};
5744 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
5745
5746 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
5747 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
5748 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
5749 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
5750
5751 int exp_h_2[] = {0, -128, 127, -255, INT16_MAX, 1, -1, 5555};
5752 int exp_s_2[] = {2048, 2048, 2048, 2048, INT32_MAX, 2048, 2048, 555555};
5753 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
5754
5755 // The immediate is in the range [-128, 127], but the macro is able to
5756 // synthesise unencodable immediates.
5757 // B-sized lanes cannot take an immediate out of the range [-128, 127].
5758 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
5759 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
5760 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
5761}
5762
5763TEST_SVE(sve_int_wide_imm_unpredicated_smin) {
5764 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
5765 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
5766 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
5767 int64_t in_d[] = {1, 10, 10000, 1000000};
5768
5769 IntWideImmFn fn = &MacroAssembler::Smin;
5770
5771 int exp_b_1[] = {-1, -128, -1, -127, -1, -1, -1, -1};
5772 int exp_h_1[] = {0, -128, 127, INT16_MIN, 127, 1, -1, 127};
5773 int exp_s_1[] = {-128, -128, -128, INT32_MIN, -128, -128, -128, -128};
5774 int64_t exp_d_1[] = {1, 10, 99, 99};
5775
5776 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
5777 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
5778 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
5779 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
5780
5781 int exp_h_2[] = {-255, -255, -255, INT16_MIN, -255, -255, -255, -255};
5782 int exp_s_2[] = {0, -128, 127, INT32_MIN, 2048, 1, -1, 2048};
5783 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
5784
5785 // The immediate is in the range [-128, 127], but the macro is able to
5786 // synthesise unencodable immediates.
5787 // B-sized lanes cannot take an immediate out of the range [-128, 127].
5788 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
5789 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
5790 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
5791}
5792
5793TEST_SVE(sve_int_wide_imm_unpredicated_umax) {
5794 int in_b[] = {0, 255, 127, 0x80, 1, 55};
5795 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
5796 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
5797 int64_t in_d[] = {1, 10, 10000, 1000000};
5798
5799 IntWideImmFn fn = &MacroAssembler::Umax;
5800
5801 int exp_b_1[] = {17, 255, 127, 0x80, 17, 55};
5802 int exp_h_1[] = {127, 255, 127, INT16_MAX, 127, 5555};
5803 int exp_s_1[] = {255, 255, 255, INT32_MAX, 255, 555555};
5804 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
5805
5806 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
5807 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
5808 IntWideImmHelper(config, fn, kSRegSize, in_s, 0xff, exp_s_1);
5809 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
5810
5811 int exp_h_2[] = {511, 511, 511, INT16_MAX, 511, 5555};
5812 int exp_s_2[] = {2048, 2048, 2048, INT32_MAX, 2048, 555555};
5813 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
5814
5815 // The immediate is in the range [0, 255], but the macro is able to
5816 // synthesise unencodable immediates.
5817 // B-sized lanes cannot take an immediate out of the range [0, 255].
5818 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
5819 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
5820 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
5821}
5822
5823TEST_SVE(sve_int_wide_imm_unpredicated_umin) {
5824 int in_b[] = {0, 255, 127, 0x80, 1, 55};
5825 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
5826 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
5827 int64_t in_d[] = {1, 10, 10000, 1000000};
5828
5829 IntWideImmFn fn = &MacroAssembler::Umin;
5830
5831 int exp_b_1[] = {0, 17, 17, 17, 1, 17};
5832 int exp_h_1[] = {0, 127, 127, 127, 1, 127};
5833 int exp_s_1[] = {0, 255, 127, 255, 1, 255};
5834 int64_t exp_d_1[] = {1, 10, 99, 99};
5835
5836 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
5837 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
5838 IntWideImmHelper(config, fn, kSRegSize, in_s, 255, exp_s_1);
5839 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
5840
5841 int exp_h_2[] = {0, 255, 127, 511, 1, 511};
5842 int exp_s_2[] = {0, 255, 127, 2048, 1, 2048};
5843 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
5844
5845 // The immediate is in the range [0, 255], but the macro is able to
5846 // synthesise unencodable immediates.
5847 // B-sized lanes cannot take an immediate out of the range [0, 255].
5848 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
5849 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
5850 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
5851}
5852
5853TEST_SVE(sve_int_wide_imm_unpredicated_mul) {
5854 int in_b[] = {11, -1, 7, -3};
5855 int in_h[] = {111, -1, 17, -123};
5856 int in_s[] = {11111, -1, 117, -12345};
5857 int64_t in_d[] = {0x7fffffff, 0x80000000};
5858
5859 IntWideImmFn fn = &MacroAssembler::Mul;
5860
5861 int exp_b_1[] = {66, -6, 42, -18};
5862 int exp_h_1[] = {-14208, 128, -2176, 15744};
5863 int exp_s_1[] = {11111 * 127, -127, 117 * 127, -12345 * 127};
5864 int64_t exp_d_1[] = {0xfffffffe, 0x100000000};
5865
5866 IntWideImmHelper(config, fn, kBRegSize, in_b, 6, exp_b_1);
5867 IntWideImmHelper(config, fn, kHRegSize, in_h, -128, exp_h_1);
5868 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
5869 IntWideImmHelper(config, fn, kDRegSize, in_d, 2, exp_d_1);
5870
5871 int exp_h_2[] = {-28305, 255, -4335, 31365};
5872 int exp_s_2[] = {22755328, -2048, 239616, -25282560};
5873 int64_t exp_d_2[] = {0x00000063ffffff38, 0x0000006400000000};
5874
5875 // The immediate is in the range [-128, 127], but the macro is able to
5876 // synthesise unencodable immediates.
5877 // B-sized lanes cannot take an immediate out of the range [0, 255].
5878 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
5879 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
5880 IntWideImmHelper(config, fn, kDRegSize, in_d, 200, exp_d_2);
5881
5882 // Integer overflow on multiplication.
5883 unsigned exp_b_3[] = {0x75, 0x81, 0x79, 0x83};
5884
5885 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x7f, exp_b_3);
5886}
5887
5888TEST_SVE(sve_int_wide_imm_unpredicated_add) {
5889 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
5890 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
5891 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
5892 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
5893
5894 IntWideImmFn fn = &MacroAssembler::Add;
5895
5896 unsigned exp_b_1[] = {0x02, 0x00, 0x91, 0x80};
5897 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
5898 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
5899 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
5900
5901 // Encodable with `add` (shift 0).
5902 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
5903 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
5904 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
5905 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
5906
5907 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
5908 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
5909 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
5910
5911 // Encodable with `add` (shift 8).
5912 // B-sized lanes cannot take a shift of 8.
5913 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
5914 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
5915 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
5916
5917 unsigned exp_s_3[] = {0x80808181, 0x807e7f7f, 0xab29aaaa, 0xf07ff0f0};
5918
5919 // The macro is able to synthesise unencodable immediates.
5920 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramleyd9f929c2019-10-02 11:42:56 +01005921
5922 unsigned exp_b_4[] = {0x61, 0x5f, 0xf0, 0xdf};
5923 unsigned exp_h_4[] = {0x6181, 0x5f7f, 0xf010, 0x8aaa};
5924 unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
5925 uint64_t exp_d_4[] = {0x8000000180018180, 0x7fffffff7fff7f7e};
5926
5927 // Negative immediates use `sub`.
5928 IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
5929 IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
5930 IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
5931 IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005932}
5933
5934TEST_SVE(sve_int_wide_imm_unpredicated_sqadd) {
5935 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
5936 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
5937 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
5938 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
5939
5940 IntWideImmFn fn = &MacroAssembler::Sqadd;
5941
Jacob Bramleyb28f6172019-10-02 12:12:35 +01005942 unsigned exp_b_1[] = {0x02, 0x7f, 0x7f, 0x7f};
TatWai Chong6995bfd2019-09-26 10:48:05 +01005943 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
5944 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
5945 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
5946
5947 // Encodable with `sqadd` (shift 0).
Jacob Bramleyb28f6172019-10-02 12:12:35 +01005948 // Note that encodable immediates are unsigned, even for signed saturation.
5949 IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005950 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
5951 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramleyb28f6172019-10-02 12:12:35 +01005952 IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005953
5954 unsigned exp_h_2[] = {0x9181, 0x7fff, 0x2010, 0xbaaa};
5955 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
5956 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
5957
5958 // Encodable with `sqadd` (shift 8).
5959 // B-sized lanes cannot take a shift of 8.
5960 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
5961 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
5962 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005963}
5964
5965TEST_SVE(sve_int_wide_imm_unpredicated_uqadd) {
5966 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
5967 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
5968 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
5969 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
5970
5971 IntWideImmFn fn = &MacroAssembler::Uqadd;
5972
5973 unsigned exp_b_1[] = {0xff, 0xff, 0x91, 0xff};
5974 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
5975 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
5976 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
5977
5978 // Encodable with `uqadd` (shift 0).
5979 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
5980 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
5981 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
5982 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
5983
5984 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
5985 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
5986 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
5987
5988 // Encodable with `uqadd` (shift 8).
5989 // B-sized lanes cannot take a shift of 8.
5990 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
5991 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
5992 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005993}
5994
5995TEST_SVE(sve_int_wide_imm_unpredicated_sub) {
5996 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
5997 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
5998 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
5999 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
6000
6001 IntWideImmFn fn = &MacroAssembler::Sub;
6002
6003 unsigned exp_b_1[] = {0x00, 0xfe, 0x8f, 0x7e};
6004 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
6005 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
6006 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
6007
6008 // Encodable with `sub` (shift 0).
6009 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
6010 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
6011 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
6012 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
6013
6014 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
6015 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
6016 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
6017
6018 // Encodable with `sub` (shift 8).
6019 // B-sized lanes cannot take a shift of 8.
6020 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
6021 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
6022 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
6023
6024 unsigned exp_s_3[] = {0x7f828181, 0x7f807f7f, 0xaa2baaaa, 0xef81f0f0};
6025
6026 // The macro is able to synthesise unencodable immediates.
6027 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramleyd9f929c2019-10-02 11:42:56 +01006028
6029 unsigned exp_b_4[] = {0xa1, 0x9f, 0x30, 0x1f};
6030 unsigned exp_h_4[] = {0xa181, 0x9f7f, 0x3010, 0xcaaa};
6031 unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
6032 uint64_t exp_d_4[] = {0x8000000180018182, 0x7fffffff7fff7f80};
6033
6034 // Negative immediates use `add`.
6035 IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
6036 IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
6037 IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
6038 IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006039}
6040
6041TEST_SVE(sve_int_wide_imm_unpredicated_sqsub) {
6042 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
6043 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
6044 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
6045 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
6046
6047 IntWideImmFn fn = &MacroAssembler::Sqsub;
6048
Jacob Bramleyb28f6172019-10-02 12:12:35 +01006049 unsigned exp_b_1[] = {0x80, 0xfe, 0x8f, 0x80};
TatWai Chong6995bfd2019-09-26 10:48:05 +01006050 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
6051 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
6052 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
6053
6054 // Encodable with `sqsub` (shift 0).
Jacob Bramleyb28f6172019-10-02 12:12:35 +01006055 // Note that encodable immediates are unsigned, even for signed saturation.
6056 IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006057 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
6058 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramleyb28f6172019-10-02 12:12:35 +01006059 IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006060
6061 unsigned exp_h_2[] = {0x8000, 0x6f7f, 0x0010, 0x9aaa};
6062 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
6063 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
6064
6065 // Encodable with `sqsub` (shift 8).
6066 // B-sized lanes cannot take a shift of 8.
6067 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
6068 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
6069 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006070}
6071
6072TEST_SVE(sve_int_wide_imm_unpredicated_uqsub) {
6073 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
6074 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
6075 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
6076 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
6077
6078 IntWideImmFn fn = &MacroAssembler::Uqsub;
6079
6080 unsigned exp_b_1[] = {0x00, 0x00, 0x00, 0x7e};
6081 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
6082 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
6083 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
6084
6085 // Encodable with `uqsub` (shift 0).
6086 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
6087 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
6088 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
6089 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
6090
6091 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
6092 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
6093 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
6094
6095 // Encodable with `uqsub` (shift 8).
6096 // B-sized lanes cannot take a shift of 8.
6097 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
6098 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
6099 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006100}
6101
6102TEST_SVE(sve_int_wide_imm_unpredicated_subr) {
6103 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6104 START();
6105
6106 // Encodable with `subr` (shift 0).
6107 __ Index(z0.VnD(), 1, 1);
6108 __ Sub(z0.VnD(), 100, z0.VnD());
6109 __ Index(z1.VnS(), 0x7f, 1);
6110 __ Sub(z1.VnS(), 0xf7, z1.VnS());
6111 __ Index(z2.VnH(), 0xaaaa, 0x2222);
6112 __ Sub(z2.VnH(), 0x80, z2.VnH());
6113 __ Index(z3.VnB(), 133, 1);
6114 __ Sub(z3.VnB(), 255, z3.VnB());
6115
6116 // Encodable with `subr` (shift 8).
6117 __ Index(z4.VnD(), 256, -1);
6118 __ Sub(z4.VnD(), 42 * 256, z4.VnD());
6119 __ Index(z5.VnS(), 0x7878, 1);
6120 __ Sub(z5.VnS(), 0x8000, z5.VnS());
6121 __ Index(z6.VnH(), 0x30f0, -1);
6122 __ Sub(z6.VnH(), 0x7f00, z6.VnH());
6123 // B-sized lanes cannot take a shift of 8.
6124
6125 // Select with movprfx.
6126 __ Index(z31.VnD(), 256, 4001);
6127 __ Sub(z7.VnD(), 42 * 256, z31.VnD());
6128
6129 // Out of immediate encodable range of `sub`.
6130 __ Index(z30.VnS(), 0x11223344, 1);
6131 __ Sub(z8.VnS(), 0x88776655, z30.VnS());
6132
6133 END();
6134
6135 if (CAN_RUN()) {
6136 RUN();
6137
6138 int expected_z0[] = {87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99};
6139 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6140
6141 int expected_z1[] = {0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78};
6142 ASSERT_EQUAL_SVE(expected_z1, z1.VnS());
6143
6144 int expected_z2[] = {0xab2c, 0xcd4e, 0xef70, 0x1192, 0x33b4, 0x55d6};
6145 ASSERT_EQUAL_SVE(expected_z2, z2.VnH());
6146
6147 int expected_z3[] = {0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a};
6148 ASSERT_EQUAL_SVE(expected_z3, z3.VnB());
6149
6150 int expected_z4[] = {10502, 10501, 10500, 10499, 10498, 10497, 10496};
6151 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6152
6153 int expected_z5[] = {0x0783, 0x0784, 0x0785, 0x0786, 0x0787, 0x0788};
6154 ASSERT_EQUAL_SVE(expected_z5, z5.VnS());
6155
6156 int expected_z6[] = {0x4e15, 0x4e14, 0x4e13, 0x4e12, 0x4e11, 0x4e10};
6157 ASSERT_EQUAL_SVE(expected_z6, z6.VnH());
6158
6159 int expected_z7[] = {-13510, -9509, -5508, -1507, 2494, 6495, 10496};
6160 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
6161
6162 int expected_z8[] = {0x7755330e, 0x7755330f, 0x77553310, 0x77553311};
6163 ASSERT_EQUAL_SVE(expected_z8, z8.VnS());
6164 }
6165}
6166
6167TEST_SVE(sve_int_wide_imm_unpredicated_fdup) {
6168 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6169 START();
6170
6171 // Immediates which can be encoded in the instructions.
6172 __ Fdup(z0.VnH(), RawbitsToFloat16(0xc500));
6173 __ Fdup(z1.VnS(), Float16(2.0));
6174 __ Fdup(z2.VnD(), Float16(3.875));
6175 __ Fdup(z3.VnH(), 8.0f);
6176 __ Fdup(z4.VnS(), -4.75f);
6177 __ Fdup(z5.VnD(), 0.5f);
6178 __ Fdup(z6.VnH(), 1.0);
6179 __ Fdup(z7.VnS(), 2.125);
6180 __ Fdup(z8.VnD(), -13.0);
6181
6182 // Immediates which cannot be encoded in the instructions.
6183 __ Fdup(z10.VnH(), Float16(0.0));
6184 __ Fdup(z11.VnH(), kFP16PositiveInfinity);
6185 __ Fdup(z12.VnS(), 255.0f);
6186 __ Fdup(z13.VnS(), kFP32NegativeInfinity);
6187 __ Fdup(z14.VnD(), 12.3456);
6188 __ Fdup(z15.VnD(), kFP64PositiveInfinity);
6189
6190 END();
6191
6192 if (CAN_RUN()) {
6193 RUN();
6194
6195 ASSERT_EQUAL_SVE(0xc500, z0.VnH());
6196 ASSERT_EQUAL_SVE(0x40000000, z1.VnS());
6197 ASSERT_EQUAL_SVE(0x400f000000000000, z2.VnD());
6198 ASSERT_EQUAL_SVE(0x4800, z3.VnH());
6199 ASSERT_EQUAL_SVE(FloatToRawbits(-4.75f), z4.VnS());
6200 ASSERT_EQUAL_SVE(DoubleToRawbits(0.5), z5.VnD());
6201 ASSERT_EQUAL_SVE(0x3c00, z6.VnH());
6202 ASSERT_EQUAL_SVE(FloatToRawbits(2.125f), z7.VnS());
6203 ASSERT_EQUAL_SVE(DoubleToRawbits(-13.0), z8.VnD());
6204
6205 ASSERT_EQUAL_SVE(0x0000, z10.VnH());
6206 ASSERT_EQUAL_SVE(Float16ToRawbits(kFP16PositiveInfinity), z11.VnH());
6207 ASSERT_EQUAL_SVE(FloatToRawbits(255.0), z12.VnS());
6208 ASSERT_EQUAL_SVE(FloatToRawbits(kFP32NegativeInfinity), z13.VnS());
6209 ASSERT_EQUAL_SVE(DoubleToRawbits(12.3456), z14.VnD());
6210 ASSERT_EQUAL_SVE(DoubleToRawbits(kFP64PositiveInfinity), z15.VnD());
6211 }
6212}
6213
TatWai Chong6f111bc2019-10-07 09:20:37 +01006214TEST_SVE(sve_andv_eorv_orv) {
6215 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6216 START();
6217
6218 uint64_t in[] = {0x8899aabbccddeeff, 0x7777555533331111, 0x123456789abcdef0};
6219 InsrHelper(&masm, z31.VnD(), in);
6220
6221 // For simplicity, we re-use the same pg for various lane sizes.
6222 // For D lanes: 1, 1, 0
6223 // For S lanes: 1, 1, 1, 0, 0
6224 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6225 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6226 Initialise(&masm, p0.VnB(), pg_in);
6227
6228 // Make a copy so we can check that constructive operations preserve zn.
6229 __ Mov(z0, z31);
6230 __ Andv(b0, p0, z0.VnB()); // destructive
6231 __ Andv(h1, p0, z31.VnH());
6232 __ Mov(z2, z31);
6233 __ Andv(s2, p0, z2.VnS()); // destructive
6234 __ Andv(d3, p0, z31.VnD());
6235
6236 __ Eorv(b4, p0, z31.VnB());
6237 __ Mov(z5, z31);
6238 __ Eorv(h5, p0, z5.VnH()); // destructive
6239 __ Eorv(s6, p0, z31.VnS());
6240 __ Mov(z7, z31);
6241 __ Eorv(d7, p0, z7.VnD()); // destructive
6242
6243 __ Mov(z8, z31);
6244 __ Orv(b8, p0, z8.VnB()); // destructive
6245 __ Orv(h9, p0, z31.VnH());
6246 __ Mov(z10, z31);
6247 __ Orv(s10, p0, z10.VnS()); // destructive
6248 __ Orv(d11, p0, z31.VnD());
6249
6250 END();
6251
6252 if (CAN_RUN()) {
6253 RUN();
6254
6255 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
6256 ASSERT_EQUAL_64(0x10, d0);
6257 ASSERT_EQUAL_64(0x1010, d1);
6258 ASSERT_EQUAL_64(0x33331111, d2);
6259 ASSERT_EQUAL_64(0x7777555533331111, d3);
6260 ASSERT_EQUAL_64(0xbf, d4);
6261 ASSERT_EQUAL_64(0xedcb, d5);
6262 ASSERT_EQUAL_64(0x44444444, d6);
6263 ASSERT_EQUAL_64(0x7777555533331111, d7);
6264 ASSERT_EQUAL_64(0xff, d8);
6265 ASSERT_EQUAL_64(0xffff, d9);
6266 ASSERT_EQUAL_64(0x77775555, d10);
6267 ASSERT_EQUAL_64(0x7777555533331111, d11);
6268 } else {
6269 ASSERT_EQUAL_64(0, d0);
6270 ASSERT_EQUAL_64(0x0010, d1);
6271 ASSERT_EQUAL_64(0x00110011, d2);
6272 ASSERT_EQUAL_64(0x0011001100110011, d3);
6273 ASSERT_EQUAL_64(0x62, d4);
6274 ASSERT_EQUAL_64(0x0334, d5);
6275 ASSERT_EQUAL_64(0x8899aabb, d6);
6276 ASSERT_EQUAL_64(0xffeeffeeffeeffee, d7);
6277 ASSERT_EQUAL_64(0xff, d8);
6278 ASSERT_EQUAL_64(0xffff, d9);
6279 ASSERT_EQUAL_64(0xffffffff, d10);
6280 ASSERT_EQUAL_64(0xffffffffffffffff, d11);
6281 }
6282
6283 // Check the upper lanes above the top of the V register are all clear.
6284 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
6285 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
6286 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
6287 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
6288 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
6289 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
6290 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
6291 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
6292 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
6293 ASSERT_EQUAL_SVE_LANE(0, z8.VnD(), i);
6294 ASSERT_EQUAL_SVE_LANE(0, z9.VnD(), i);
6295 ASSERT_EQUAL_SVE_LANE(0, z10.VnD(), i);
6296 ASSERT_EQUAL_SVE_LANE(0, z11.VnD(), i);
6297 }
6298 }
6299}
6300
TatWai Chongb2d8d1f2019-10-21 15:19:31 -07006301
6302TEST_SVE(sve_saddv_uaddv) {
6303 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6304 START();
6305
6306 uint64_t in[] = {0x8899aabbccddeeff, 0x8182838485868788, 0x0807060504030201};
6307 InsrHelper(&masm, z31.VnD(), in);
6308
6309 // For simplicity, we re-use the same pg for various lane sizes.
6310 // For D lanes: 1, 1, 0
6311 // For S lanes: 1, 1, 1, 0, 0
6312 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6313 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6314 Initialise(&masm, p0.VnB(), pg_in);
6315
6316 // Make a copy so we can check that constructive operations preserve zn.
6317 __ Mov(z0, z31);
6318 __ Saddv(b0, p0, z0.VnB()); // destructive
6319 __ Saddv(h1, p0, z31.VnH());
6320 __ Mov(z2, z31);
6321 __ Saddv(s2, p0, z2.VnS()); // destructive
6322
6323 __ Uaddv(b4, p0, z31.VnB());
6324 __ Mov(z5, z31);
6325 __ Uaddv(h5, p0, z5.VnH()); // destructive
6326 __ Uaddv(s6, p0, z31.VnS());
6327 __ Mov(z7, z31);
6328 __ Uaddv(d7, p0, z7.VnD()); // destructive
6329
6330 END();
6331
6332 if (CAN_RUN()) {
6333 RUN();
6334
6335 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
6336 // Saddv
6337 ASSERT_EQUAL_64(0xfffffffffffffda9, d0);
6338 ASSERT_EQUAL_64(0xfffffffffffe9495, d1);
6339 ASSERT_EQUAL_64(0xffffffff07090b0c, d2);
6340 // Uaddv
6341 ASSERT_EQUAL_64(0x00000000000002a9, d4);
6342 ASSERT_EQUAL_64(0x0000000000019495, d5);
6343 ASSERT_EQUAL_64(0x0000000107090b0c, d6);
6344 ASSERT_EQUAL_64(0x8182838485868788, d7);
6345 } else {
6346 // Saddv
6347 ASSERT_EQUAL_64(0xfffffffffffffd62, d0);
6348 ASSERT_EQUAL_64(0xfffffffffffe8394, d1);
6349 ASSERT_EQUAL_64(0xfffffffed3e6fa0b, d2);
6350 // Uaddv
6351 ASSERT_EQUAL_64(0x0000000000000562, d4);
6352 ASSERT_EQUAL_64(0x0000000000028394, d5);
6353 ASSERT_EQUAL_64(0x00000001d3e6fa0b, d6);
6354 ASSERT_EQUAL_64(0x0a1c2e4052647687, d7);
6355 }
6356
6357 // Check the upper lanes above the top of the V register are all clear.
6358 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
6359 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
6360 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
6361 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
6362 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
6363 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
6364 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
6365 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
6366 }
6367 }
6368}
6369
6370
6371TEST_SVE(sve_sminv_uminv) {
6372 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6373 START();
6374
6375 uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
6376 InsrHelper(&masm, z31.VnD(), in);
6377
6378 // For simplicity, we re-use the same pg for various lane sizes.
6379 // For D lanes: 1, 0, 1
6380 // For S lanes: 1, 1, 0, 0, 1
6381 // For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
6382 int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
6383 Initialise(&masm, p0.VnB(), pg_in);
6384
6385 // Make a copy so we can check that constructive operations preserve zn.
6386 __ Mov(z0, z31);
6387 __ Sminv(b0, p0, z0.VnB()); // destructive
6388 __ Sminv(h1, p0, z31.VnH());
6389 __ Mov(z2, z31);
6390 __ Sminv(s2, p0, z2.VnS()); // destructive
6391 __ Sminv(d3, p0, z31.VnD());
6392
6393 __ Uminv(b4, p0, z31.VnB());
6394 __ Mov(z5, z31);
6395 __ Uminv(h5, p0, z5.VnH()); // destructive
6396 __ Uminv(s6, p0, z31.VnS());
6397 __ Mov(z7, z31);
6398 __ Uminv(d7, p0, z7.VnD()); // destructive
6399
6400 END();
6401
6402 if (CAN_RUN()) {
6403 RUN();
6404
6405 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
6406 // Sminv
6407 ASSERT_EQUAL_64(0xaa, d0);
6408 ASSERT_EQUAL_64(0xaabb, d1);
6409 ASSERT_EQUAL_64(0xaabbfc00, d2);
6410 ASSERT_EQUAL_64(0x00112233aabbfc00, d3); // The smaller lane is inactive.
6411 // Uminv
6412 ASSERT_EQUAL_64(0, d4);
6413 ASSERT_EQUAL_64(0x2233, d5);
6414 ASSERT_EQUAL_64(0x112233, d6);
6415 ASSERT_EQUAL_64(0x00112233aabbfc00, d7); // The smaller lane is inactive.
6416 } else {
6417 // Sminv
6418 ASSERT_EQUAL_64(0xaa, d0);
6419 ASSERT_EQUAL_64(0xaaaa, d1);
6420 ASSERT_EQUAL_64(0xaaaaaaaa, d2);
6421 ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d3);
6422 // Uminv
6423 ASSERT_EQUAL_64(0, d4);
6424 ASSERT_EQUAL_64(0x2233, d5);
6425 ASSERT_EQUAL_64(0x112233, d6);
6426 ASSERT_EQUAL_64(0x00112233aabbfc00, d7);
6427 }
6428
6429 // Check the upper lanes above the top of the V register are all clear.
6430 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
6431 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
6432 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
6433 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
6434 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
6435 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
6436 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
6437 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
6438 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
6439 }
6440 }
6441}
6442
6443TEST_SVE(sve_smaxv_umaxv) {
6444 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6445 START();
6446
6447 uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
6448 InsrHelper(&masm, z31.VnD(), in);
6449
6450 // For simplicity, we re-use the same pg for various lane sizes.
6451 // For D lanes: 1, 0, 1
6452 // For S lanes: 1, 1, 0, 0, 1
6453 // For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
6454 int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
6455 Initialise(&masm, p0.VnB(), pg_in);
6456
6457 // Make a copy so we can check that constructive operations preserve zn.
6458 __ Mov(z0, z31);
6459 __ Smaxv(b0, p0, z0.VnB()); // destructive
6460 __ Smaxv(h1, p0, z31.VnH());
6461 __ Mov(z2, z31);
6462 __ Smaxv(s2, p0, z2.VnS()); // destructive
6463 __ Smaxv(d3, p0, z31.VnD());
6464
6465 __ Umaxv(b4, p0, z31.VnB());
6466 __ Mov(z5, z31);
6467 __ Umaxv(h5, p0, z5.VnH()); // destructive
6468 __ Umaxv(s6, p0, z31.VnS());
6469 __ Mov(z7, z31);
6470 __ Umaxv(d7, p0, z7.VnD()); // destructive
6471
6472 END();
6473
6474 if (CAN_RUN()) {
6475 RUN();
6476
6477 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
6478 // Smaxv
6479 ASSERT_EQUAL_64(0x33, d0);
6480 ASSERT_EQUAL_64(0x44aa, d1);
6481 ASSERT_EQUAL_64(0x112233, d2);
6482 ASSERT_EQUAL_64(0x112233aabbfc00, d3);
6483 // Umaxv
6484 ASSERT_EQUAL_64(0xfe, d4);
6485 ASSERT_EQUAL_64(0xfc00, d5);
6486 ASSERT_EQUAL_64(0xaabbfc00, d6);
6487 ASSERT_EQUAL_64(0x112233aabbfc00, d7);
6488 } else {
6489 // Smaxv
6490 ASSERT_EQUAL_64(0x33, d0);
6491 ASSERT_EQUAL_64(0x44aa, d1);
6492 ASSERT_EQUAL_64(0x112233, d2);
6493 ASSERT_EQUAL_64(0x00112233aabbfc00, d3);
6494 // Umaxv
6495 ASSERT_EQUAL_64(0xfe, d4);
6496 ASSERT_EQUAL_64(0xfc00, d5);
6497 ASSERT_EQUAL_64(0xaabbfc00, d6);
6498 ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d7);
6499 }
6500
6501 // Check the upper lanes above the top of the V register are all clear.
6502 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
6503 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
6504 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
6505 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
6506 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
6507 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
6508 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
6509 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
6510 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
6511 }
6512 }
6513}
6514
TatWai Chong4d2a4e92019-10-23 16:19:32 -07006515typedef void (MacroAssembler::*SdotUdotFn)(const ZRegister& zd,
6516 const ZRegister& za,
6517 const ZRegister& zn,
6518 const ZRegister& zm);
6519
6520template <typename Td, typename Ts, typename Te>
6521static void SdotUdotHelper(Test* config,
6522 SdotUdotFn macro,
6523 unsigned lane_size_in_bits,
6524 const Td& zd_inputs,
6525 const Td& za_inputs,
6526 const Ts& zn_inputs,
6527 const Ts& zm_inputs,
6528 const Te& zd_expected,
6529 const Te& zdnm_expected) {
6530 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6531 START();
6532
6533 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
6534 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
6535 ZRegister zn = z2.WithLaneSize(lane_size_in_bits / 4);
6536 ZRegister zm = z3.WithLaneSize(lane_size_in_bits / 4);
6537
6538 InsrHelper(&masm, zd, zd_inputs);
6539 InsrHelper(&masm, za, za_inputs);
6540 InsrHelper(&masm, zn, zn_inputs);
6541 InsrHelper(&masm, zm, zm_inputs);
6542
6543 // The Dot macro handles arbitrarily-aliased registers in the argument list.
6544 ZRegister da_result = z10.WithLaneSize(lane_size_in_bits);
6545 ZRegister dn_result = z11.WithLaneSize(lane_size_in_bits);
6546 ZRegister dm_result = z12.WithLaneSize(lane_size_in_bits);
6547 ZRegister dnm_result = z13.WithLaneSize(lane_size_in_bits);
6548 ZRegister d_result = z14.WithLaneSize(lane_size_in_bits);
6549
6550 __ Mov(da_result, za);
6551 // zda = zda + (zn . zm)
6552 (masm.*macro)(da_result, da_result, zn, zm);
6553
6554 __ Mov(dn_result, zn);
6555 // zdn = za + (zdn . zm)
6556 (masm.*macro)(dn_result, za, dn_result, zm);
6557
6558 __ Mov(dm_result, zm);
6559 // zdm = za + (zn . zdm)
6560 (masm.*macro)(dm_result, za, zn, dm_result);
6561
6562 __ Mov(d_result, zd);
6563 // zd = za + (zn . zm)
6564 (masm.*macro)(d_result, za, zn, zm);
6565
6566 __ Mov(dnm_result, zn);
6567 // zdnm = za + (zdmn . zdnm)
6568 (masm.*macro)(dnm_result, za, dnm_result, dnm_result);
6569
6570 END();
6571
6572 if (CAN_RUN()) {
6573 RUN();
6574
6575 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
6576 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits / 4));
6577 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits / 4));
6578
6579 ASSERT_EQUAL_SVE(zd_expected, da_result);
6580 ASSERT_EQUAL_SVE(zd_expected, dn_result);
6581 ASSERT_EQUAL_SVE(zd_expected, dm_result);
6582 ASSERT_EQUAL_SVE(zd_expected, d_result);
6583
6584 ASSERT_EQUAL_SVE(zdnm_expected, dnm_result);
6585 }
6586}
6587
6588TEST_SVE(sve_sdot) {
6589 int zd_inputs[] = {0x33, 0xee, 0xff};
6590 int za_inputs[] = {INT32_MAX, -3, 2};
6591 int zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
6592 int zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
6593
6594 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
6595 int32_t zd_expected_s[] = {-2147418113, -183, 133}; // 0x8000ffff
6596 int64_t zd_expected_d[] = {2147549183, -183, 133}; // 0x8000ffff
6597
6598 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
6599 int32_t zdnm_expected_s[] = {-2147418113, 980, 572};
6600 int64_t zdnm_expected_d[] = {2147549183, 980, 572};
6601
6602 SdotUdotHelper(config,
6603 &MacroAssembler::Sdot,
6604 kSRegSize,
6605 zd_inputs,
6606 za_inputs,
6607 zn_inputs,
6608 zm_inputs,
6609 zd_expected_s,
6610 zdnm_expected_s);
6611 SdotUdotHelper(config,
6612 &MacroAssembler::Sdot,
6613 kDRegSize,
6614 zd_inputs,
6615 za_inputs,
6616 zn_inputs,
6617 zm_inputs,
6618 zd_expected_d,
6619 zdnm_expected_d);
6620}
6621
6622TEST_SVE(sve_udot) {
6623 int zd_inputs[] = {0x33, 0xee, 0xff};
6624 int za_inputs[] = {INT32_MAX, -3, 2};
6625 int zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
6626 int zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
6627
6628 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
6629 uint32_t zd_expected_s[] = {0x8000ffff, 0x00001749, 0x0000f085};
6630 uint64_t zd_expected_d[] = {0x000000047c00ffff,
6631 0x000000000017ff49,
6632 0x00000000fff00085};
6633
6634 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
6635 uint32_t zdnm_expected_s[] = {0x8000ffff, 0x000101d4, 0x0001d03c};
6636 uint64_t zdnm_expected_d[] = {0x000000047c00ffff,
6637 0x00000000fffe03d4,
6638 0x00000001ffce023c};
6639
6640 SdotUdotHelper(config,
6641 &MacroAssembler::Udot,
6642 kSRegSize,
6643 zd_inputs,
6644 za_inputs,
6645 zn_inputs,
6646 zm_inputs,
6647 zd_expected_s,
6648 zdnm_expected_s);
6649 SdotUdotHelper(config,
6650 &MacroAssembler::Udot,
6651 kDRegSize,
6652 zd_inputs,
6653 za_inputs,
6654 zn_inputs,
6655 zm_inputs,
6656 zd_expected_d,
6657 zdnm_expected_d);
6658}
6659
TatWai Chongfe536042019-10-23 16:34:11 -07006660template <size_t N>
6661static void FPToRawbits(const double (&inputs)[N],
6662 uint64_t* outputs,
6663 unsigned lane_size_in_bits) {
6664 for (size_t i = 0; i < N; i++) {
6665 switch (lane_size_in_bits) {
6666 case kHRegSize:
6667 outputs[i] = Float16ToRawbits(
6668 FPToFloat16(inputs[i], FPTieEven, kIgnoreDefaultNaN));
6669 break;
6670 case kSRegSize:
6671 outputs[i] =
6672 FloatToRawbits(FPToFloat(inputs[i], FPTieEven, kIgnoreDefaultNaN));
6673 break;
6674 case kDRegSize:
6675 outputs[i] = DoubleToRawbits(inputs[i]);
6676 break;
6677 default:
6678 VIXL_UNIMPLEMENTED();
6679 break;
6680 }
6681 }
6682}
6683
6684template <typename Td, size_t N>
6685static void FPArithmeticFnHelper(Test* config,
6686 ArithmeticFn macro,
6687 unsigned lane_size_in_bits,
6688 const double (&zn_inputs)[N],
6689 const double (&zm_inputs)[N],
6690 const Td& zd_expected) {
6691 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6692 START();
6693
6694 ZRegister zd = z29.WithLaneSize(lane_size_in_bits);
6695 ZRegister zn = z30.WithLaneSize(lane_size_in_bits);
6696 ZRegister zm = z31.WithLaneSize(lane_size_in_bits);
6697
6698 uint64_t zn_rawbits[N];
6699 uint64_t zm_rawbits[N];
6700
6701 FPToRawbits(zn_inputs, zn_rawbits, lane_size_in_bits);
6702 FPToRawbits(zm_inputs, zm_rawbits, lane_size_in_bits);
6703
6704 InsrHelper(&masm, zn, zn_rawbits);
6705 InsrHelper(&masm, zm, zm_rawbits);
6706
6707 (masm.*macro)(zd, zn, zm);
6708
6709 END();
6710
6711 if (CAN_RUN()) {
6712 RUN();
6713
6714 ASSERT_EQUAL_SVE(zd_expected, zd);
6715 }
6716}
6717
6718TEST_SVE(sve_fp_arithmetic_unpredicated_fadd) {
6719 double zn_inputs[] = {24.0,
6720 5.5,
6721 0.0,
6722 3.875,
6723 2.125,
6724 kFP64PositiveInfinity,
6725 kFP64NegativeInfinity};
6726
6727 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
6728
6729 ArithmeticFn fn = &MacroAssembler::Fadd;
6730
6731 uint16_t expected_h[] = {Float16ToRawbits(Float16(1048.0)),
6732 Float16ToRawbits(Float16(2053.5)),
6733 Float16ToRawbits(Float16(0.1)),
6734 Float16ToRawbits(Float16(-0.875)),
6735 Float16ToRawbits(Float16(14.465)),
6736 Float16ToRawbits(kFP16PositiveInfinity),
6737 Float16ToRawbits(kFP16NegativeInfinity)};
6738
6739 FPArithmeticFnHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
6740
6741 uint32_t expected_s[] = {FloatToRawbits(1048.0f),
6742 FloatToRawbits(2053.5f),
6743 FloatToRawbits(0.1f),
6744 FloatToRawbits(-0.875f),
6745 FloatToRawbits(14.465f),
6746 FloatToRawbits(kFP32PositiveInfinity),
6747 FloatToRawbits(kFP32NegativeInfinity)};
6748
6749 FPArithmeticFnHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
6750
6751 uint64_t expected_d[] = {DoubleToRawbits(1048.0),
6752 DoubleToRawbits(2053.5),
6753 DoubleToRawbits(0.1),
6754 DoubleToRawbits(-0.875),
6755 DoubleToRawbits(14.465),
6756 DoubleToRawbits(kFP64PositiveInfinity),
6757 DoubleToRawbits(kFP64NegativeInfinity)};
6758
6759 FPArithmeticFnHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
6760}
6761
6762TEST_SVE(sve_fp_arithmetic_unpredicated_fsub) {
6763 double zn_inputs[] = {24.0,
6764 5.5,
6765 0.0,
6766 3.875,
6767 2.125,
6768 kFP64PositiveInfinity,
6769 kFP64NegativeInfinity};
6770
6771 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
6772
6773 ArithmeticFn fn = &MacroAssembler::Fsub;
6774
6775 uint16_t expected_h[] = {Float16ToRawbits(Float16(-1000.0)),
6776 Float16ToRawbits(Float16(-2042.5)),
6777 Float16ToRawbits(Float16(-0.1)),
6778 Float16ToRawbits(Float16(8.625)),
6779 Float16ToRawbits(Float16(-10.215)),
6780 Float16ToRawbits(kFP16PositiveInfinity),
6781 Float16ToRawbits(kFP16NegativeInfinity)};
6782
6783 FPArithmeticFnHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
6784
6785 uint32_t expected_s[] = {FloatToRawbits(-1000.0),
6786 FloatToRawbits(-2042.5),
6787 FloatToRawbits(-0.1),
6788 FloatToRawbits(8.625),
6789 FloatToRawbits(-10.215),
6790 FloatToRawbits(kFP32PositiveInfinity),
6791 FloatToRawbits(kFP32NegativeInfinity)};
6792
6793 FPArithmeticFnHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
6794
6795 uint64_t expected_d[] = {DoubleToRawbits(-1000.0),
6796 DoubleToRawbits(-2042.5),
6797 DoubleToRawbits(-0.1),
6798 DoubleToRawbits(8.625),
6799 DoubleToRawbits(-10.215),
6800 DoubleToRawbits(kFP64PositiveInfinity),
6801 DoubleToRawbits(kFP64NegativeInfinity)};
6802
6803 FPArithmeticFnHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
6804}
6805
6806TEST_SVE(sve_fp_arithmetic_unpredicated_fmul) {
6807 double zn_inputs[] = {24.0,
6808 5.5,
6809 0.0,
6810 3.875,
6811 2.125,
6812 kFP64PositiveInfinity,
6813 kFP64NegativeInfinity};
6814
6815 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
6816
6817 ArithmeticFn fn = &MacroAssembler::Fmul;
6818
6819 uint16_t expected_h[] = {Float16ToRawbits(Float16(24576.0)),
6820 Float16ToRawbits(Float16(11264.0)),
6821 Float16ToRawbits(Float16(0.0)),
6822 Float16ToRawbits(Float16(-18.4)),
6823 Float16ToRawbits(Float16(26.23)),
6824 Float16ToRawbits(kFP16PositiveInfinity),
6825 Float16ToRawbits(kFP16PositiveInfinity)};
6826
6827 FPArithmeticFnHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
6828
6829 uint32_t expected_s[] = {FloatToRawbits(24576.0),
6830 FloatToRawbits(11264.0),
6831 FloatToRawbits(0.0),
6832 FloatToRawbits(-18.40625),
6833 FloatToRawbits(26.2225),
6834 FloatToRawbits(kFP32PositiveInfinity),
6835 FloatToRawbits(kFP32PositiveInfinity)};
6836
6837 FPArithmeticFnHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
6838
6839 uint64_t expected_d[] = {DoubleToRawbits(24576.0),
6840 DoubleToRawbits(11264.0),
6841 DoubleToRawbits(0.0),
6842 DoubleToRawbits(-18.40625),
6843 DoubleToRawbits(26.2225),
6844 DoubleToRawbits(kFP64PositiveInfinity),
6845 DoubleToRawbits(kFP64PositiveInfinity)};
6846
6847 FPArithmeticFnHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
6848}
6849
Jacob Bramleyd77a8e42019-02-12 16:52:24 +00006850} // namespace aarch64
6851} // namespace vixl