blob: 86193580e45ce4ecadcfef5e314e670cfbb69b0d [file] [log] [blame]
Jacob Bramleyd77a8e42019-02-12 16:52:24 +00001// Copyright 2019, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7// * Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// * Neither the name of ARM Limited nor the names of its contributors may be
13// used to endorse or promote products derived from this software without
14// specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <sys/mman.h>
28
29#include <cfloat>
30#include <cmath>
31#include <cstdio>
32#include <cstdlib>
33#include <cstring>
34
35#include "test-runner.h"
36#include "test-utils.h"
37#include "aarch64/test-utils-aarch64.h"
38
39#include "aarch64/cpu-aarch64.h"
40#include "aarch64/disasm-aarch64.h"
41#include "aarch64/macro-assembler-aarch64.h"
42#include "aarch64/simulator-aarch64.h"
43#include "test-assembler-aarch64.h"
44
45namespace vixl {
46namespace aarch64 {
47
Jacob Bramleye8289202019-07-31 11:25:23 +010048Test* MakeSVETest(int vl, const char* name, Test::TestFunctionWithConfig* fn) {
49 // We never free this memory, but we need it to live for as long as the static
50 // linked list of tests, and this is the easiest way to do it.
51 Test* test = new Test(name, fn);
52 test->set_sve_vl_in_bits(vl);
53 return test;
54}
55
56// The TEST_SVE macro works just like the usual TEST macro, but the resulting
57// function receives a `const Test& config` argument, to allow it to query the
58// vector length.
59#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
60// On the Simulator, run SVE tests with several vector lengths, including the
61// extreme values and an intermediate value that isn't a power of two.
62
63#define TEST_SVE(name) \
64 void Test##name(Test* config); \
65 Test* test_##name##_list[] = \
66 {MakeSVETest(128, "AARCH64_ASM_" #name "_vl128", &Test##name), \
67 MakeSVETest(384, "AARCH64_ASM_" #name "_vl384", &Test##name), \
68 MakeSVETest(2048, "AARCH64_ASM_" #name "_vl2048", &Test##name)}; \
69 void Test##name(Test* config)
70
71#define SVE_SETUP_WITH_FEATURES(...) \
72 SETUP_WITH_FEATURES(__VA_ARGS__); \
73 simulator.SetVectorLengthInBits(config->sve_vl_in_bits())
74
75#else
76// Otherwise, just use whatever the hardware provides.
77static const int kSVEVectorLengthInBits =
78 CPUFeatures::InferFromOS().Has(CPUFeatures::kSVE)
79 ? CPU::ReadSVEVectorLengthInBits()
80 : 0;
81
82#define TEST_SVE(name) \
83 void Test##name(Test* config); \
84 Test* test_##name##_vlauto = MakeSVETest(kSVEVectorLengthInBits, \
85 "AARCH64_ASM_" #name "_vlauto", \
86 &Test##name); \
87 void Test##name(Test* config)
88
89#define SVE_SETUP_WITH_FEATURES(...) \
90 SETUP_WITH_FEATURES(__VA_ARGS__); \
91 USE(config)
92
93#endif
94
Jacob Bramley03c0b512019-02-22 16:42:06 +000095// Call masm->Insr repeatedly to allow test inputs to be set up concisely. This
96// is optimised for call-site clarity, not generated code quality, so it doesn't
97// exist in the MacroAssembler itself.
98//
99// Usage:
100//
101// int values[] = { 42, 43, 44 };
102// InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 }
103//
104// The rightmost (highest-indexed) array element maps to the lowest-numbered
105// lane.
106template <typename T, size_t N>
107void InsrHelper(MacroAssembler* masm,
108 const ZRegister& zdn,
109 const T (&values)[N]) {
110 for (size_t i = 0; i < N; i++) {
111 masm->Insr(zdn, values[i]);
112 }
113}
114
Jacob Bramley0ce75842019-07-17 18:12:50 +0100115// Conveniently initialise P registers with scalar bit patterns. The destination
116// lane size is ignored. This is optimised for call-site clarity, not generated
117// code quality.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100118//
119// Usage:
120//
Jacob Bramley0ce75842019-07-17 18:12:50 +0100121// Initialise(&masm, p0, 0x1234); // Sets p0 = 0b'0001'0010'0011'0100
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100122void Initialise(MacroAssembler* masm,
Jacob Bramley0ce75842019-07-17 18:12:50 +0100123 const PRegister& pd,
124 uint64_t value3,
125 uint64_t value2,
126 uint64_t value1,
127 uint64_t value0) {
128 // Generate a literal pool, as in the array form.
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100129 UseScratchRegisterScope temps(masm);
130 Register temp = temps.AcquireX();
131 Label data;
132 Label done;
133
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100134 masm->Adr(temp, &data);
Jacob Bramley66e66712019-08-02 17:45:32 +0100135 masm->Ldr(pd, SVEMemOperand(temp));
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100136 masm->B(&done);
137 {
138 ExactAssemblyScope total(masm, kPRegMaxSizeInBytes);
139 masm->bind(&data);
Jacob Bramley0ce75842019-07-17 18:12:50 +0100140 masm->dc64(value0);
141 masm->dc64(value1);
142 masm->dc64(value2);
143 masm->dc64(value3);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100144 }
145 masm->Bind(&done);
146}
Jacob Bramley0ce75842019-07-17 18:12:50 +0100147void Initialise(MacroAssembler* masm,
148 const PRegister& pd,
149 uint64_t value2,
150 uint64_t value1,
151 uint64_t value0) {
152 Initialise(masm, pd, 0, value2, value1, value0);
153}
154void Initialise(MacroAssembler* masm,
155 const PRegister& pd,
156 uint64_t value1,
157 uint64_t value0) {
158 Initialise(masm, pd, 0, 0, value1, value0);
159}
160void Initialise(MacroAssembler* masm, const PRegister& pd, uint64_t value0) {
161 Initialise(masm, pd, 0, 0, 0, value0);
162}
163
164// Conveniently initialise P registers by lane. This is optimised for call-site
165// clarity, not generated code quality.
166//
167// Usage:
168//
169// int values[] = { 0x0, 0x1, 0x2 };
170// Initialise(&masm, p0.VnS(), values); // Sets p0 = 0b'0000'0001'0010
171//
172// The rightmost (highest-indexed) array element maps to the lowest-numbered
173// lane. Unspecified lanes are set to 0 (inactive).
174//
175// Each element of the `values` array is mapped onto a lane in `pd`. The
176// architecture only respects the lower bit, and writes zero the upper bits, but
177// other (encodable) values can be specified if required by the test.
178template <typename T, size_t N>
179void Initialise(MacroAssembler* masm,
180 const PRegisterWithLaneSize& pd,
181 const T (&values)[N]) {
182 // Turn the array into 64-bit chunks.
183 uint64_t chunks[4] = {0, 0, 0, 0};
184 VIXL_STATIC_ASSERT(sizeof(chunks) == kPRegMaxSizeInBytes);
185
186 int p_bits_per_lane = pd.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
187 VIXL_ASSERT((64 % p_bits_per_lane) == 0);
188 VIXL_ASSERT((N * p_bits_per_lane) <= kPRegMaxSize);
189
190 uint64_t p_lane_mask = GetUintMask(p_bits_per_lane);
191
192 VIXL_STATIC_ASSERT(N <= kPRegMaxSize);
193 size_t bit = 0;
194 for (int n = static_cast<int>(N - 1); n >= 0; n--) {
195 VIXL_ASSERT(bit < (sizeof(chunks) * kBitsPerByte));
196 uint64_t value = values[n] & p_lane_mask;
197 chunks[bit / 64] |= value << (bit % 64);
198 bit += p_bits_per_lane;
199 }
200
201 Initialise(masm, pd, chunks[3], chunks[2], chunks[1], chunks[0]);
202}
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100203
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000204// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100205TEST_SVE(sve_test_infrastructure_z) {
206 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000207 START();
208
Jacob Bramley03c0b512019-02-22 16:42:06 +0000209 __ Mov(x0, 0x0123456789abcdef);
210
211 // Test basic `Insr` behaviour.
212 __ Insr(z0.VnB(), 1);
213 __ Insr(z0.VnB(), 2);
214 __ Insr(z0.VnB(), x0);
215 __ Insr(z0.VnB(), -42);
216 __ Insr(z0.VnB(), 0);
217
218 // Test array inputs.
219 int z1_inputs[] = {3, 4, 5, -42, 0};
220 InsrHelper(&masm, z1.VnH(), z1_inputs);
221
222 // Test that sign-extension works as intended for various lane sizes.
223 __ Dup(z2.VnD(), 0); // Clear the register first.
224 __ Insr(z2.VnB(), -42); // 0xd6
225 __ Insr(z2.VnB(), 0xfe); // 0xfe
226 __ Insr(z2.VnH(), -42); // 0xffd6
227 __ Insr(z2.VnH(), 0xfedc); // 0xfedc
228 __ Insr(z2.VnS(), -42); // 0xffffffd6
229 __ Insr(z2.VnS(), 0xfedcba98); // 0xfedcba98
230 // Use another register for VnD(), so we can support 128-bit Z registers.
231 __ Insr(z3.VnD(), -42); // 0xffffffffffffffd6
232 __ Insr(z3.VnD(), 0xfedcba9876543210); // 0xfedcba9876543210
233
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000234 END();
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000235
Jacob Bramley119bd212019-04-16 10:13:09 +0100236 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100237 RUN();
Jacob Bramley03c0b512019-02-22 16:42:06 +0000238
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100239 // Test that array checks work properly on a register initialised
240 // lane-by-lane.
241 int z0_inputs_b[] = {0x01, 0x02, 0xef, 0xd6, 0x00};
242 ASSERT_EQUAL_SVE(z0_inputs_b, z0.VnB());
Jacob Bramley03c0b512019-02-22 16:42:06 +0000243
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100244 // Test that lane-by-lane checks work properly on a register initialised
245 // by array.
246 for (size_t i = 0; i < ArrayLength(z1_inputs); i++) {
247 // The rightmost (highest-indexed) array element maps to the
248 // lowest-numbered lane.
249 int lane = static_cast<int>(ArrayLength(z1_inputs) - i - 1);
250 ASSERT_EQUAL_SVE_LANE(z1_inputs[i], z1.VnH(), lane);
Jacob Bramley03c0b512019-02-22 16:42:06 +0000251 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100252
253 uint64_t z2_inputs_d[] = {0x0000d6feffd6fedc, 0xffffffd6fedcba98};
254 ASSERT_EQUAL_SVE(z2_inputs_d, z2.VnD());
255 uint64_t z3_inputs_d[] = {0xffffffffffffffd6, 0xfedcba9876543210};
256 ASSERT_EQUAL_SVE(z3_inputs_d, z3.VnD());
Jacob Bramley119bd212019-04-16 10:13:09 +0100257 }
Jacob Bramleyd77a8e42019-02-12 16:52:24 +0000258}
259
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100260// Ensure that basic test infrastructure works.
Jacob Bramleye8289202019-07-31 11:25:23 +0100261TEST_SVE(sve_test_infrastructure_p) {
262 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100263 START();
264
265 // Simple cases: move boolean (0 or 1) values.
266
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100267 int p0_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100268 Initialise(&masm, p0.VnB(), p0_inputs);
269
270 int p1_inputs[] = {1, 0, 1, 1, 0, 1, 1, 1};
271 Initialise(&masm, p1.VnH(), p1_inputs);
272
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100273 int p2_inputs[] = {1, 1, 0, 1};
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100274 Initialise(&masm, p2.VnS(), p2_inputs);
275
276 int p3_inputs[] = {0, 1};
277 Initialise(&masm, p3.VnD(), p3_inputs);
278
279 // Advanced cases: move numeric value into architecturally-ignored bits.
280
281 // B-sized lanes get one bit in a P register, so there are no ignored bits.
282
283 // H-sized lanes get two bits in a P register.
284 int p4_inputs[] = {0x3, 0x2, 0x1, 0x0, 0x1, 0x2, 0x3};
285 Initialise(&masm, p4.VnH(), p4_inputs);
286
287 // S-sized lanes get four bits in a P register.
288 int p5_inputs[] = {0xc, 0x7, 0x9, 0x6, 0xf};
289 Initialise(&masm, p5.VnS(), p5_inputs);
290
291 // D-sized lanes get eight bits in a P register.
292 int p6_inputs[] = {0x81, 0xcc, 0x55};
293 Initialise(&masm, p6.VnD(), p6_inputs);
294
295 // The largest possible P register has 32 bytes.
296 int p7_inputs[] = {0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
297 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
298 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
299 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f};
300 Initialise(&masm, p7.VnD(), p7_inputs);
301
302 END();
303
304 if (CAN_RUN()) {
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100305 RUN();
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100306
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100307 // Test that lane-by-lane checks work properly. The rightmost
308 // (highest-indexed) array element maps to the lowest-numbered lane.
309 for (size_t i = 0; i < ArrayLength(p0_inputs); i++) {
310 int lane = static_cast<int>(ArrayLength(p0_inputs) - i - 1);
311 ASSERT_EQUAL_SVE_LANE(p0_inputs[i], p0.VnB(), lane);
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100312 }
Jacob Bramley9d06c4d2019-05-13 18:15:06 +0100313 for (size_t i = 0; i < ArrayLength(p1_inputs); i++) {
314 int lane = static_cast<int>(ArrayLength(p1_inputs) - i - 1);
315 ASSERT_EQUAL_SVE_LANE(p1_inputs[i], p1.VnH(), lane);
316 }
317 for (size_t i = 0; i < ArrayLength(p2_inputs); i++) {
318 int lane = static_cast<int>(ArrayLength(p2_inputs) - i - 1);
319 ASSERT_EQUAL_SVE_LANE(p2_inputs[i], p2.VnS(), lane);
320 }
321 for (size_t i = 0; i < ArrayLength(p3_inputs); i++) {
322 int lane = static_cast<int>(ArrayLength(p3_inputs) - i - 1);
323 ASSERT_EQUAL_SVE_LANE(p3_inputs[i], p3.VnD(), lane);
324 }
325
326 // Test that array checks work properly on predicates initialised with a
327 // possibly-different lane size.
328 // 0b...11'10'01'00'01'10'11
329 int p4_expected[] = {0x39, 0x1b};
330 ASSERT_EQUAL_SVE(p4_expected, p4.VnD());
331
332 ASSERT_EQUAL_SVE(p5_inputs, p5.VnS());
333
334 // 0b...10000001'11001100'01010101
335 int p6_expected[] = {2, 0, 0, 1, 3, 0, 3, 0, 1, 1, 1, 1};
336 ASSERT_EQUAL_SVE(p6_expected, p6.VnH());
337
338 // 0b...10011100'10011101'10011110'10011111
339 int p7_expected[] = {1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1,
340 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1};
341 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
Jacob Bramley2eaecf12019-05-01 15:46:34 +0100342 }
343}
344
Jacob Bramley935b15b2019-07-04 14:09:22 +0100345// Test that writes to V registers clear the high bits of the corresponding Z
346// register.
Jacob Bramleye8289202019-07-31 11:25:23 +0100347TEST_SVE(sve_v_write_clear) {
348 SVE_SETUP_WITH_FEATURES(CPUFeatures::kNEON,
349 CPUFeatures::kFP,
350 CPUFeatures::kSVE);
Jacob Bramley935b15b2019-07-04 14:09:22 +0100351 START();
352
353 // The Simulator has two mechansisms for writing V registers:
354 // - Write*Register, calling through to SimRegisterBase::Write.
355 // - LogicVRegister::ClearForWrite followed by one or more lane updates.
356 // Try to cover both variants.
357
358 // Prepare some known inputs.
359 uint8_t data[kQRegSizeInBytes];
360 for (size_t i = 0; i < kQRegSizeInBytes; i++) {
361 data[i] = 42 + i;
362 }
363 __ Mov(x10, reinterpret_cast<uintptr_t>(data));
364 __ Fmov(d30, 42.0);
365
Jacob Bramley199339d2019-08-05 18:49:13 +0100366 // Use Index to label the lane indices, so failures are easy to detect and
Jacob Bramley935b15b2019-07-04 14:09:22 +0100367 // diagnose.
368 __ Index(z0.VnB(), 0, 1);
369 __ Index(z1.VnB(), 0, 1);
370 __ Index(z2.VnB(), 0, 1);
371 __ Index(z3.VnB(), 0, 1);
372 __ Index(z4.VnB(), 0, 1);
373
374 __ Index(z10.VnB(), 0, -1);
375 __ Index(z11.VnB(), 0, -1);
376 __ Index(z12.VnB(), 0, -1);
377 __ Index(z13.VnB(), 0, -1);
378 __ Index(z14.VnB(), 0, -1);
379
380 // Instructions using Write*Register (and SimRegisterBase::Write).
381 __ Ldr(b0, MemOperand(x10));
382 __ Fcvt(h1, d30);
383 __ Fmov(s2, 1.5f);
384 __ Fmov(d3, d30);
385 __ Ldr(q4, MemOperand(x10));
386
387 // Instructions using LogicVRegister::ClearForWrite.
388 // These also (incidentally) test that across-lane instructions correctly
389 // ignore the high-order Z register lanes.
390 __ Sminv(b10, v10.V16B());
391 __ Addv(h11, v11.V4H());
392 __ Saddlv(s12, v12.V8H());
393 __ Dup(v13.V8B(), b13, kDRegSizeInBytes);
394 __ Uaddl(v14.V8H(), v14.V8B(), v14.V8B());
395
396 END();
397
398 if (CAN_RUN()) {
399 RUN();
400
401 // Check the Q part first.
402 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000002a, v0);
403 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000005140, v1); // 42.0 (f16)
404 ASSERT_EQUAL_128(0x0000000000000000, 0x000000003fc00000, v2); // 1.5 (f32)
405 ASSERT_EQUAL_128(0x0000000000000000, 0x4045000000000000, v3); // 42.0 (f64)
406 ASSERT_EQUAL_128(0x3938373635343332, 0x31302f2e2d2c2b2a, v4);
407 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000f1, v10); // -15
408 // 0xf9fa + 0xfbfc + 0xfdfe + 0xff00 -> 0xf2f4
409 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000f2f4, v11);
410 // 0xfffff1f2 + 0xfffff3f4 + ... + 0xfffffdfe + 0xffffff00 -> 0xffffc6c8
411 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffc6c8, v12);
412 ASSERT_EQUAL_128(0x0000000000000000, 0xf8f8f8f8f8f8f8f8, v13); // [-8] x 8
413 // [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
414 // + [0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 0x0000]
415 // -> [0x01f2, 0x01f4, 0x01f6, 0x01f8, 0x01fa, 0x01fc, 0x01fe, 0x0000]
416 ASSERT_EQUAL_128(0x01f201f401f601f8, 0x01fa01fc01fe0000, v14);
417
418 // Check that the upper lanes are all clear.
419 for (int i = kQRegSizeInBytes; i < core.GetSVELaneCount(kBRegSize); i++) {
420 ASSERT_EQUAL_SVE_LANE(0x00, z0.VnB(), i);
421 ASSERT_EQUAL_SVE_LANE(0x00, z1.VnB(), i);
422 ASSERT_EQUAL_SVE_LANE(0x00, z2.VnB(), i);
423 ASSERT_EQUAL_SVE_LANE(0x00, z3.VnB(), i);
424 ASSERT_EQUAL_SVE_LANE(0x00, z4.VnB(), i);
425 ASSERT_EQUAL_SVE_LANE(0x00, z10.VnB(), i);
426 ASSERT_EQUAL_SVE_LANE(0x00, z11.VnB(), i);
427 ASSERT_EQUAL_SVE_LANE(0x00, z12.VnB(), i);
428 ASSERT_EQUAL_SVE_LANE(0x00, z13.VnB(), i);
429 ASSERT_EQUAL_SVE_LANE(0x00, z14.VnB(), i);
430 }
431 }
432}
433
Jacob Bramleye8289202019-07-31 11:25:23 +0100434static void MlaMlsHelper(Test* config, unsigned lane_size_in_bits) {
435 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley22023df2019-05-14 17:55:43 +0100436 START();
437
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100438 int zd_inputs[] = {0xbb, 0xcc, 0xdd, 0xee};
Jacob Bramley22023df2019-05-14 17:55:43 +0100439 int za_inputs[] = {-39, 1, -3, 2};
440 int zn_inputs[] = {-5, -20, 9, 8};
441 int zm_inputs[] = {9, -5, 4, 5};
442
443 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
444 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
445 ZRegister zn = z2.WithLaneSize(lane_size_in_bits);
446 ZRegister zm = z3.WithLaneSize(lane_size_in_bits);
447
448 // TODO: Use a simple `Dup` once it accepts arbitrary immediates.
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100449 InsrHelper(&masm, zd, zd_inputs);
Jacob Bramley22023df2019-05-14 17:55:43 +0100450 InsrHelper(&masm, za, za_inputs);
451 InsrHelper(&masm, zn, zn_inputs);
452 InsrHelper(&masm, zm, zm_inputs);
453
454 int p0_inputs[] = {1, 1, 0, 1};
455 int p1_inputs[] = {1, 0, 1, 1};
456 int p2_inputs[] = {0, 1, 1, 1};
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100457 int p3_inputs[] = {1, 1, 1, 0};
Jacob Bramley22023df2019-05-14 17:55:43 +0100458
459 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), p0_inputs);
460 Initialise(&masm, p1.WithLaneSize(lane_size_in_bits), p1_inputs);
461 Initialise(&masm, p2.WithLaneSize(lane_size_in_bits), p2_inputs);
462 Initialise(&masm, p3.WithLaneSize(lane_size_in_bits), p3_inputs);
463
464 // The Mla macro automatically selects between mla, mad and movprfx + mla
465 // based on what registers are aliased.
466 ZRegister mla_da_result = z10.WithLaneSize(lane_size_in_bits);
467 ZRegister mla_dn_result = z11.WithLaneSize(lane_size_in_bits);
468 ZRegister mla_dm_result = z12.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100469 ZRegister mla_d_result = z13.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100470
471 __ Mov(mla_da_result, za);
472 __ Mla(mla_da_result, p0.Merging(), mla_da_result, zn, zm);
473
474 __ Mov(mla_dn_result, zn);
475 __ Mla(mla_dn_result, p1.Merging(), za, mla_dn_result, zm);
476
477 __ Mov(mla_dm_result, zm);
478 __ Mla(mla_dm_result, p2.Merging(), za, zn, mla_dm_result);
479
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100480 __ Mov(mla_d_result, zd);
481 __ Mla(mla_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100482
483 // The Mls macro automatically selects between mls, msb and movprfx + mls
484 // based on what registers are aliased.
485 ZRegister mls_da_result = z20.WithLaneSize(lane_size_in_bits);
486 ZRegister mls_dn_result = z21.WithLaneSize(lane_size_in_bits);
487 ZRegister mls_dm_result = z22.WithLaneSize(lane_size_in_bits);
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100488 ZRegister mls_d_result = z23.WithLaneSize(lane_size_in_bits);
Jacob Bramley22023df2019-05-14 17:55:43 +0100489
490 __ Mov(mls_da_result, za);
491 __ Mls(mls_da_result, p0.Merging(), mls_da_result, zn, zm);
492
493 __ Mov(mls_dn_result, zn);
494 __ Mls(mls_dn_result, p1.Merging(), za, mls_dn_result, zm);
495
496 __ Mov(mls_dm_result, zm);
497 __ Mls(mls_dm_result, p2.Merging(), za, zn, mls_dm_result);
498
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100499 __ Mov(mls_d_result, zd);
500 __ Mls(mls_d_result, p3.Merging(), za, zn, zm);
Jacob Bramley22023df2019-05-14 17:55:43 +0100501
502 END();
503
504 if (CAN_RUN()) {
505 RUN();
506
507 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
508 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits));
509 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits));
510
511 int mla[] = {-84, 101, 33, 42};
512 int mls[] = {6, -99, -39, -38};
513
514 int mla_da_expected[] = {mla[0], mla[1], za_inputs[2], mla[3]};
515 ASSERT_EQUAL_SVE(mla_da_expected, mla_da_result);
516
517 int mla_dn_expected[] = {mla[0], zn_inputs[1], mla[2], mla[3]};
518 ASSERT_EQUAL_SVE(mla_dn_expected, mla_dn_result);
519
520 int mla_dm_expected[] = {zm_inputs[0], mla[1], mla[2], mla[3]};
521 ASSERT_EQUAL_SVE(mla_dm_expected, mla_dm_result);
522
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100523 int mla_d_expected[] = {mla[0], mla[1], mla[2], zd_inputs[3]};
524 ASSERT_EQUAL_SVE(mla_d_expected, mla_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100525
526 int mls_da_expected[] = {mls[0], mls[1], za_inputs[2], mls[3]};
527 ASSERT_EQUAL_SVE(mls_da_expected, mls_da_result);
528
529 int mls_dn_expected[] = {mls[0], zn_inputs[1], mls[2], mls[3]};
530 ASSERT_EQUAL_SVE(mls_dn_expected, mls_dn_result);
531
532 int mls_dm_expected[] = {zm_inputs[0], mls[1], mls[2], mls[3]};
533 ASSERT_EQUAL_SVE(mls_dm_expected, mls_dm_result);
534
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +0100535 int mls_d_expected[] = {mls[0], mls[1], mls[2], zd_inputs[3]};
536 ASSERT_EQUAL_SVE(mls_d_expected, mls_d_result);
Jacob Bramley22023df2019-05-14 17:55:43 +0100537 }
538}
539
Jacob Bramleye8289202019-07-31 11:25:23 +0100540TEST_SVE(sve_mla_mls_b) { MlaMlsHelper(config, kBRegSize); }
541TEST_SVE(sve_mla_mls_h) { MlaMlsHelper(config, kHRegSize); }
542TEST_SVE(sve_mla_mls_s) { MlaMlsHelper(config, kSRegSize); }
543TEST_SVE(sve_mla_mls_d) { MlaMlsHelper(config, kDRegSize); }
Jacob Bramley22023df2019-05-14 17:55:43 +0100544
Jacob Bramleye8289202019-07-31 11:25:23 +0100545TEST_SVE(sve_bitwise_unpredicate_logical) {
546 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongcfb94212019-05-16 13:30:09 -0700547 START();
548
549 uint64_t z8_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
550 InsrHelper(&masm, z8.VnD(), z8_inputs);
551 uint64_t z15_inputs[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff};
552 InsrHelper(&masm, z15.VnD(), z15_inputs);
553
554 __ And(z1.VnD(), z8.VnD(), z15.VnD());
555 __ Bic(z2.VnD(), z8.VnD(), z15.VnD());
556 __ Eor(z3.VnD(), z8.VnD(), z15.VnD());
557 __ Orr(z4.VnD(), z8.VnD(), z15.VnD());
558
559 END();
560
561 if (CAN_RUN()) {
562 RUN();
563 uint64_t z1_expected[] = {0xfedcaa8854540000, 0x0000454588aacdef};
564 uint64_t z2_expected[] = {0x0000101022003210, 0x0123002201010000};
565 uint64_t z3_expected[] = {0x01235476ab89fedc, 0xcdef98ba67453210};
566 uint64_t z4_expected[] = {0xfffffefeffddfedc, 0xcdefddffefefffff};
567
568 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
569 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
570 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
571 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
572 }
TatWai Chongcfb94212019-05-16 13:30:09 -0700573}
574
Jacob Bramleye8289202019-07-31 11:25:23 +0100575TEST_SVE(sve_predicate_logical) {
576 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chongf4fa8222019-06-17 12:08:14 -0700577 START();
578
579 // 0b...01011010'10110111
580 int p10_inputs[] = {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1}; // Pm
581 // 0b...11011001'01010010
582 int p11_inputs[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0}; // Pn
583 // 0b...01010101'10110010
584 int p12_inputs[] = {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0}; // pg
585
586 Initialise(&masm, p10.VnB(), p10_inputs);
587 Initialise(&masm, p11.VnB(), p11_inputs);
588 Initialise(&masm, p12.VnB(), p12_inputs);
589
590 __ Ands(p0.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
591 __ Mrs(x0, NZCV);
592 __ Bics(p1.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
593 __ Mrs(x1, NZCV);
594 __ Eor(p2.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
595 __ Nand(p3.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
596 __ Nor(p4.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
597 __ Orn(p5.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
598 __ Orr(p6.VnB(), p12.Zeroing(), p11.VnB(), p10.VnB());
599 __ Sel(p7.VnB(), p12, p11.VnB(), p10.VnB());
600
601 END();
602
603 if (CAN_RUN()) {
604 RUN();
605
606 // 0b...01010000'00010010
607 int p0_expected[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0};
608 // 0b...00000001'00000000
609 int p1_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0};
610 // 0b...00000001'10100000
611 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
612 // 0b...00000101'10100000
613 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0};
614 // 0b...00000100'00000000
615 int p4_expected[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
616 // 0b...01010101'00010010
617 int p5_expected[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0};
618 // 0b...01010001'10110010
619 int p6_expected[] = {0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
620 // 0b...01011011'00010111
621 int p7_expected[] = {0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1};
622
623 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
624 ASSERT_EQUAL_SVE(p1_expected, p1.VnB());
625 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
626 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
627 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
628 ASSERT_EQUAL_SVE(p5_expected, p5.VnB());
629 ASSERT_EQUAL_SVE(p6_expected, p6.VnB());
630 ASSERT_EQUAL_SVE(p7_expected, p7.VnB());
631
TatWai Chong96713fe2019-06-04 16:39:37 -0700632 ASSERT_EQUAL_32(SVEFirstFlag, w0);
633 ASSERT_EQUAL_32(SVENotLastFlag, w1);
634 }
635}
TatWai Chongf4fa8222019-06-17 12:08:14 -0700636
Jacob Bramleye8289202019-07-31 11:25:23 +0100637TEST_SVE(sve_int_compare_vectors) {
638 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -0700639 START();
640
641 int z10_inputs[] = {0x00, 0x80, 0xff, 0x7f, 0x00, 0x00, 0x00, 0xff};
642 int z11_inputs[] = {0x00, 0x00, 0x00, 0x00, 0x80, 0xff, 0x7f, 0xfe};
643 int p0_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
644 InsrHelper(&masm, z10.VnB(), z10_inputs);
645 InsrHelper(&masm, z11.VnB(), z11_inputs);
646 Initialise(&masm, p0.VnB(), p0_inputs);
647
648 __ Cmphs(p6.VnB(), p0.Zeroing(), z10.VnB(), z11.VnB());
649 __ Mrs(x6, NZCV);
650
651 uint64_t z12_inputs[] = {0xffffffffffffffff, 0x8000000000000000};
652 uint64_t z13_inputs[] = {0x0000000000000000, 0x8000000000000000};
653 int p1_inputs[] = {1, 1};
654 InsrHelper(&masm, z12.VnD(), z12_inputs);
655 InsrHelper(&masm, z13.VnD(), z13_inputs);
656 Initialise(&masm, p1.VnD(), p1_inputs);
657
658 __ Cmphi(p7.VnD(), p1.Zeroing(), z12.VnD(), z13.VnD());
659 __ Mrs(x7, NZCV);
660
661 int z14_inputs[] = {0, 32767, -1, -32767, 0, 0, 0, 32766};
662 int z15_inputs[] = {0, 0, 0, 0, 32767, -1, -32767, 32767};
663
664 int p2_inputs[] = {1, 0, 1, 1, 1, 1, 1, 1};
665 InsrHelper(&masm, z14.VnH(), z14_inputs);
666 InsrHelper(&masm, z15.VnH(), z15_inputs);
667 Initialise(&masm, p2.VnH(), p2_inputs);
668
669 __ Cmpge(p8.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
670 __ Mrs(x8, NZCV);
671
672 __ Cmpeq(p9.VnH(), p2.Zeroing(), z14.VnH(), z15.VnH());
673 __ Mrs(x9, NZCV);
674
675 int z16_inputs[] = {0, -1, 0, 0};
676 int z17_inputs[] = {0, 0, 2147483647, -2147483648};
677 int p3_inputs[] = {1, 1, 1, 1};
678 InsrHelper(&masm, z16.VnS(), z16_inputs);
679 InsrHelper(&masm, z17.VnS(), z17_inputs);
680 Initialise(&masm, p3.VnS(), p3_inputs);
681
682 __ Cmpgt(p10.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
683 __ Mrs(x10, NZCV);
684
685 __ Cmpne(p11.VnS(), p3.Zeroing(), z16.VnS(), z17.VnS());
686 __ Mrs(x11, NZCV);
687
688 // Architectural aliases testing.
689 __ Cmpls(p12.VnB(), p0.Zeroing(), z11.VnB(), z10.VnB()); // HS
690 __ Cmplo(p13.VnD(), p1.Zeroing(), z13.VnD(), z12.VnD()); // HI
691 __ Cmple(p14.VnH(), p2.Zeroing(), z15.VnH(), z14.VnH()); // GE
692 __ Cmplt(p15.VnS(), p3.Zeroing(), z17.VnS(), z16.VnS()); // GT
693
694 END();
695
696 if (CAN_RUN()) {
697 RUN();
698
699 int p6_expected[] = {1, 0, 1, 1, 0, 0, 0, 1};
700 for (size_t i = 0; i < ArrayLength(p6_expected); i++) {
701 int lane = static_cast<int>(ArrayLength(p6_expected) - i - 1);
702 ASSERT_EQUAL_SVE_LANE(p6_expected[i], p6.VnB(), lane);
703 }
704
705 int p7_expected[] = {1, 0};
706 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
707
708 int p8_expected[] = {1, 0, 0, 0, 0, 1, 1, 0};
709 ASSERT_EQUAL_SVE(p8_expected, p8.VnH());
710
711 int p9_expected[] = {1, 0, 0, 0, 0, 0, 0, 0};
712 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
713
714 int p10_expected[] = {0, 0, 0, 1};
715 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
716
717 int p11_expected[] = {0, 1, 1, 1};
718 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
719
720 // Reuse the expected results to verify the architectural aliases.
721 ASSERT_EQUAL_SVE(p6_expected, p12.VnB());
722 ASSERT_EQUAL_SVE(p7_expected, p13.VnD());
723 ASSERT_EQUAL_SVE(p8_expected, p14.VnH());
724 ASSERT_EQUAL_SVE(p10_expected, p15.VnS());
725
726 ASSERT_EQUAL_32(SVEFirstFlag, w6);
727 ASSERT_EQUAL_32(NoFlag, w7);
728 ASSERT_EQUAL_32(NoFlag, w8);
729 ASSERT_EQUAL_32(NoFlag, w9);
730 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
731 }
732}
733
Jacob Bramleye8289202019-07-31 11:25:23 +0100734TEST_SVE(sve_int_compare_vectors_wide_elements) {
735 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong96713fe2019-06-04 16:39:37 -0700736 START();
737
738 int src1_inputs_1[] = {0, 1, -1, -128, 127, 100, -66};
739 int src2_inputs_1[] = {0, -1};
740 int mask_inputs_1[] = {1, 1, 1, 1, 1, 0, 1};
741 InsrHelper(&masm, z13.VnB(), src1_inputs_1);
742 InsrHelper(&masm, z19.VnD(), src2_inputs_1);
743 Initialise(&masm, p0.VnB(), mask_inputs_1);
744
745 __ Cmpge(p2.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
746 __ Mrs(x2, NZCV);
747 __ Cmpgt(p3.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
748 __ Mrs(x3, NZCV);
749
750 int src1_inputs_2[] = {0, 32767, -1, -32767, 1, 1234, 0, 32766};
751 int src2_inputs_2[] = {0, -32767};
752 int mask_inputs_2[] = {1, 0, 1, 1, 1, 1, 1, 1};
753 InsrHelper(&masm, z13.VnH(), src1_inputs_2);
754 InsrHelper(&masm, z19.VnD(), src2_inputs_2);
755 Initialise(&masm, p0.VnH(), mask_inputs_2);
756
757 __ Cmple(p4.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
758 __ Mrs(x4, NZCV);
759 __ Cmplt(p5.VnH(), p0.Zeroing(), z13.VnH(), z19.VnD());
760 __ Mrs(x5, NZCV);
761
762 int src1_inputs_3[] = {0, -1, 2147483647, -2147483648};
763 int src2_inputs_3[] = {0, -2147483648};
764 int mask_inputs_3[] = {1, 1, 1, 1};
765 InsrHelper(&masm, z13.VnS(), src1_inputs_3);
766 InsrHelper(&masm, z19.VnD(), src2_inputs_3);
767 Initialise(&masm, p0.VnS(), mask_inputs_3);
768
769 __ Cmpeq(p6.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
770 __ Mrs(x6, NZCV);
771 __ Cmpne(p7.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
772 __ Mrs(x7, NZCV);
773
774 int src1_inputs_4[] = {0x00, 0x80, 0x7f, 0xff, 0x7f, 0xf0, 0x0f, 0x55};
775 int src2_inputs_4[] = {0x00, 0x7f};
776 int mask_inputs_4[] = {1, 1, 1, 1, 0, 1, 1, 1};
777 InsrHelper(&masm, z13.VnB(), src1_inputs_4);
778 InsrHelper(&masm, z19.VnD(), src2_inputs_4);
779 Initialise(&masm, p0.VnB(), mask_inputs_4);
780
781 __ Cmplo(p8.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
782 __ Mrs(x8, NZCV);
783 __ Cmpls(p9.VnB(), p0.Zeroing(), z13.VnB(), z19.VnD());
784 __ Mrs(x9, NZCV);
785
786 int src1_inputs_5[] = {0x0000, 0x8000, 0x7fff, 0xffff};
787 int src2_inputs_5[] = {0x8000, 0xffff};
788 int mask_inputs_5[] = {1, 1, 1, 1};
789 InsrHelper(&masm, z13.VnS(), src1_inputs_5);
790 InsrHelper(&masm, z19.VnD(), src2_inputs_5);
791 Initialise(&masm, p0.VnS(), mask_inputs_5);
792
793 __ Cmphi(p10.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
794 __ Mrs(x10, NZCV);
795 __ Cmphs(p11.VnS(), p0.Zeroing(), z13.VnS(), z19.VnD());
796 __ Mrs(x11, NZCV);
797
798 END();
799
800 if (CAN_RUN()) {
801 RUN();
802 int p2_expected[] = {1, 1, 1, 0, 1, 0, 0};
803 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
804
805 int p3_expected[] = {1, 1, 0, 0, 1, 0, 0};
806 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
807
808 int p4_expected[] = {0x1, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
809 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
810
811 int p5_expected[] = {0x0, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0};
812 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
813
814 int p6_expected[] = {0x1, 0x0, 0x0, 0x1};
815 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
816
817 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
818 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
819
820 int p8_expected[] = {1, 0, 0, 0, 0, 0, 1, 1};
821 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
822
823 int p9_expected[] = {1, 0, 1, 0, 0, 0, 1, 1};
824 ASSERT_EQUAL_SVE(p9_expected, p9.VnB());
825
826 int p10_expected[] = {0x0, 0x0, 0x0, 0x0};
827 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
828
829 int p11_expected[] = {0x0, 0x1, 0x0, 0x1};
830 ASSERT_EQUAL_SVE(p11_expected, p11.VnS());
831
832 ASSERT_EQUAL_32(NoFlag, w2);
833 ASSERT_EQUAL_32(NoFlag, w3);
834 ASSERT_EQUAL_32(NoFlag, w4);
835 ASSERT_EQUAL_32(SVENotLastFlag, w5);
836 ASSERT_EQUAL_32(SVEFirstFlag, w6);
837 ASSERT_EQUAL_32(SVENotLastFlag, w7);
838 ASSERT_EQUAL_32(SVEFirstFlag, w8);
839 ASSERT_EQUAL_32(SVEFirstFlag, w9);
840 ASSERT_EQUAL_32(SVENotLastFlag | SVENoneFlag, w10);
841 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w11);
TatWai Chongf4fa8222019-06-17 12:08:14 -0700842 }
TatWai Chongf4fa8222019-06-17 12:08:14 -0700843}
844
Jacob Bramleye8289202019-07-31 11:25:23 +0100845TEST_SVE(sve_bitwise_imm) {
846 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chonga1885a52019-04-15 17:19:14 -0700847 START();
848
849 // clang-format off
850 uint64_t z21_inputs[] = {0xfedcba9876543210, 0x0123456789abcdef};
851 uint32_t z22_inputs[] = {0xfedcba98, 0x76543210, 0x01234567, 0x89abcdef};
852 uint16_t z23_inputs[] = {0xfedc, 0xba98, 0x7654, 0x3210,
853 0x0123, 0x4567, 0x89ab, 0xcdef};
854 uint8_t z24_inputs[] = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
855 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef};
856 // clang-format on
857
858 InsrHelper(&masm, z1.VnD(), z21_inputs);
859 InsrHelper(&masm, z2.VnS(), z22_inputs);
860 InsrHelper(&masm, z3.VnH(), z23_inputs);
861 InsrHelper(&masm, z4.VnB(), z24_inputs);
862
863 __ And(z1.VnD(), z1.VnD(), 0x0000ffff0000ffff);
864 __ And(z2.VnS(), z2.VnS(), 0xff0000ff);
865 __ And(z3.VnH(), z3.VnH(), 0x0ff0);
866 __ And(z4.VnB(), z4.VnB(), 0x3f);
867
868 InsrHelper(&masm, z5.VnD(), z21_inputs);
869 InsrHelper(&masm, z6.VnS(), z22_inputs);
870 InsrHelper(&masm, z7.VnH(), z23_inputs);
871 InsrHelper(&masm, z8.VnB(), z24_inputs);
872
873 __ Eor(z5.VnD(), z5.VnD(), 0x0000ffff0000ffff);
874 __ Eor(z6.VnS(), z6.VnS(), 0xff0000ff);
875 __ Eor(z7.VnH(), z7.VnH(), 0x0ff0);
876 __ Eor(z8.VnB(), z8.VnB(), 0x3f);
877
878 InsrHelper(&masm, z9.VnD(), z21_inputs);
879 InsrHelper(&masm, z10.VnS(), z22_inputs);
880 InsrHelper(&masm, z11.VnH(), z23_inputs);
881 InsrHelper(&masm, z12.VnB(), z24_inputs);
882
883 __ Orr(z9.VnD(), z9.VnD(), 0x0000ffff0000ffff);
884 __ Orr(z10.VnS(), z10.VnS(), 0xff0000ff);
885 __ Orr(z11.VnH(), z11.VnH(), 0x0ff0);
886 __ Orr(z12.VnB(), z12.VnB(), 0x3f);
887
Jacob Bramley6069fd42019-06-24 10:20:45 +0100888 {
889 // The `Dup` macro maps onto either `dup` or `dupm`, but has its own test,
890 // so here we test `dupm` directly.
891 ExactAssemblyScope guard(&masm, 4 * kInstructionSize);
892 __ dupm(z13.VnD(), 0x7ffffff800000000);
893 __ dupm(z14.VnS(), 0x7ffc7ffc);
894 __ dupm(z15.VnH(), 0x3ffc);
895 __ dupm(z16.VnB(), 0xc3);
896 }
TatWai Chonga1885a52019-04-15 17:19:14 -0700897
898 END();
899
900 if (CAN_RUN()) {
901 RUN();
902
903 // clang-format off
904 uint64_t z1_expected[] = {0x0000ba9800003210, 0x000045670000cdef};
905 uint32_t z2_expected[] = {0xfe000098, 0x76000010, 0x01000067, 0x890000ef};
906 uint16_t z3_expected[] = {0x0ed0, 0x0a90, 0x0650, 0x0210,
907 0x0120, 0x0560, 0x09a0, 0x0de0};
908 uint8_t z4_expected[] = {0x3e, 0x1c, 0x3a, 0x18, 0x36, 0x14, 0x32, 0x10,
909 0x01, 0x23, 0x05, 0x27, 0x09, 0x2b, 0x0d, 0x2f};
910
911 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
912 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
913 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
914 ASSERT_EQUAL_SVE(z4_expected, z4.VnB());
915
916 uint64_t z5_expected[] = {0xfedc45677654cdef, 0x0123ba9889ab3210};
917 uint32_t z6_expected[] = {0x01dcba67, 0x895432ef, 0xfe234598, 0x76abcd10};
918 uint16_t z7_expected[] = {0xf12c, 0xb568, 0x79a4, 0x3de0,
919 0x0ed3, 0x4a97, 0x865b, 0xc21f};
920 uint8_t z8_expected[] = {0xc1, 0xe3, 0x85, 0xa7, 0x49, 0x6b, 0x0d, 0x2f,
921 0x3e, 0x1c, 0x7a, 0x58, 0xb6, 0x94, 0xf2, 0xd0};
922
923 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
924 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
925 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
926 ASSERT_EQUAL_SVE(z8_expected, z8.VnB());
927
928 uint64_t z9_expected[] = {0xfedcffff7654ffff, 0x0123ffff89abffff};
929 uint32_t z10_expected[] = {0xffdcbaff, 0xff5432ff, 0xff2345ff, 0xffabcdff};
930 uint16_t z11_expected[] = {0xfffc, 0xbff8, 0x7ff4, 0x3ff0,
931 0x0ff3, 0x4ff7, 0x8ffb, 0xcfff};
932 uint8_t z12_expected[] = {0xff, 0xff, 0xbf, 0xbf, 0x7f, 0x7f, 0x3f, 0x3f,
933 0x3f, 0x3f, 0x7f, 0x7f, 0xbf, 0xbf, 0xff, 0xff};
934
935 ASSERT_EQUAL_SVE(z9_expected, z9.VnD());
936 ASSERT_EQUAL_SVE(z10_expected, z10.VnS());
937 ASSERT_EQUAL_SVE(z11_expected, z11.VnH());
938 ASSERT_EQUAL_SVE(z12_expected, z12.VnB());
939
940 uint64_t z13_expected[] = {0x7ffffff800000000, 0x7ffffff800000000};
941 uint32_t z14_expected[] = {0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc, 0x7ffc7ffc};
942 uint16_t z15_expected[] = {0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc,
943 0x3ffc, 0x3ffc, 0x3ffc ,0x3ffc};
944 ASSERT_EQUAL_SVE(z13_expected, z13.VnD());
945 ASSERT_EQUAL_SVE(z14_expected, z14.VnS());
946 ASSERT_EQUAL_SVE(z15_expected, z15.VnH());
947 // clang-format on
948 }
TatWai Chonga1885a52019-04-15 17:19:14 -0700949}
950
Jacob Bramleye8289202019-07-31 11:25:23 +0100951TEST_SVE(sve_dup_imm) {
Jacob Bramley6069fd42019-06-24 10:20:45 +0100952 // The `Dup` macro can generate `dup`, `dupm`, and it can synthesise
953 // unencodable immediates.
954
Jacob Bramleye8289202019-07-31 11:25:23 +0100955 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley6069fd42019-06-24 10:20:45 +0100956 START();
957
958 // Encodable with `dup` (shift 0).
959 __ Dup(z0.VnD(), -1);
960 __ Dup(z1.VnS(), 0x7f);
961 __ Dup(z2.VnH(), -0x80);
962 __ Dup(z3.VnB(), 42);
963
964 // Encodable with `dup` (shift 8).
TatWai Chong6995bfd2019-09-26 10:48:05 +0100965 __ Dup(z4.VnD(), -42 * 256);
966 __ Dup(z5.VnS(), -0x8000);
967 __ Dup(z6.VnH(), 0x7f00);
Jacob Bramley6069fd42019-06-24 10:20:45 +0100968 // B-sized lanes cannot take a shift of 8.
969
970 // Encodable with `dupm` (but not `dup`).
971 __ Dup(z10.VnD(), 0x3fc);
972 __ Dup(z11.VnS(), -516097); // 0xfff81fff, as a signed int.
973 __ Dup(z12.VnH(), 0x0001);
974 // All values that fit B-sized lanes are encodable with `dup`.
975
976 // Cases that require immediate synthesis.
977 __ Dup(z20.VnD(), 0x1234);
978 __ Dup(z21.VnD(), -4242);
979 __ Dup(z22.VnD(), 0xfedcba9876543210);
980 __ Dup(z23.VnS(), 0x01020304);
981 __ Dup(z24.VnS(), -0x01020304);
982 __ Dup(z25.VnH(), 0x3c38);
983 // All values that fit B-sized lanes are directly encodable.
984
985 END();
986
987 if (CAN_RUN()) {
988 RUN();
989
990 ASSERT_EQUAL_SVE(0xffffffffffffffff, z0.VnD());
991 ASSERT_EQUAL_SVE(0x0000007f, z1.VnS());
992 ASSERT_EQUAL_SVE(0xff80, z2.VnH());
993 ASSERT_EQUAL_SVE(0x2a, z3.VnB());
994
TatWai Chong6995bfd2019-09-26 10:48:05 +0100995 ASSERT_EQUAL_SVE(0xffffffffffffd600, z4.VnD());
996 ASSERT_EQUAL_SVE(0xffff8000, z5.VnS());
997 ASSERT_EQUAL_SVE(0x7f00, z6.VnH());
Jacob Bramley6069fd42019-06-24 10:20:45 +0100998
999 ASSERT_EQUAL_SVE(0x00000000000003fc, z10.VnD());
1000 ASSERT_EQUAL_SVE(0xfff81fff, z11.VnS());
1001 ASSERT_EQUAL_SVE(0x0001, z12.VnH());
1002
1003 ASSERT_EQUAL_SVE(0x1234, z20.VnD());
1004 ASSERT_EQUAL_SVE(0xffffffffffffef6e, z21.VnD());
1005 ASSERT_EQUAL_SVE(0xfedcba9876543210, z22.VnD());
1006 ASSERT_EQUAL_SVE(0x01020304, z23.VnS());
1007 ASSERT_EQUAL_SVE(0xfefdfcfc, z24.VnS());
1008 ASSERT_EQUAL_SVE(0x3c38, z25.VnH());
1009 }
1010}
1011
Jacob Bramleye8289202019-07-31 11:25:23 +01001012TEST_SVE(sve_inc_dec_p_scalar) {
1013 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001014 START();
1015
1016 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1017 Initialise(&masm, p0.VnB(), p0_inputs);
1018
1019 int p0_b_count = 9;
1020 int p0_h_count = 5;
1021 int p0_s_count = 3;
1022 int p0_d_count = 2;
1023
1024 // 64-bit operations preserve their high bits.
1025 __ Mov(x0, 0x123456780000002a);
1026 __ Decp(x0, p0.VnB());
1027
1028 __ Mov(x1, 0x123456780000002a);
1029 __ Incp(x1, p0.VnH());
1030
1031 // Check that saturation does not occur.
1032 __ Mov(x10, 1);
1033 __ Decp(x10, p0.VnS());
1034
1035 __ Mov(x11, UINT64_MAX);
1036 __ Incp(x11, p0.VnD());
1037
1038 __ Mov(x12, INT64_MAX);
1039 __ Incp(x12, p0.VnB());
1040
1041 // With an all-true predicate, these instructions increment or decrement by
1042 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001043 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001044
1045 __ Mov(x20, 0x4000000000000000);
1046 __ Decp(x20, p15.VnB());
1047
1048 __ Mov(x21, 0x4000000000000000);
1049 __ Incp(x21, p15.VnH());
1050
1051 END();
1052 if (CAN_RUN()) {
1053 RUN();
1054
1055 ASSERT_EQUAL_64(0x123456780000002a - p0_b_count, x0);
1056 ASSERT_EQUAL_64(0x123456780000002a + p0_h_count, x1);
1057
1058 ASSERT_EQUAL_64(UINT64_C(1) - p0_s_count, x10);
1059 ASSERT_EQUAL_64(UINT64_MAX + p0_d_count, x11);
1060 ASSERT_EQUAL_64(static_cast<uint64_t>(INT64_MAX) + p0_b_count, x12);
1061
1062 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1063 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1064 }
1065}
1066
Jacob Bramleye8289202019-07-31 11:25:23 +01001067TEST_SVE(sve_sqinc_sqdec_p_scalar) {
1068 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001069 START();
1070
1071 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1072 Initialise(&masm, p0.VnB(), p0_inputs);
1073
1074 int p0_b_count = 9;
1075 int p0_h_count = 5;
1076 int p0_s_count = 3;
1077 int p0_d_count = 2;
1078
1079 uint64_t dummy_high = 0x1234567800000000;
1080
1081 // 64-bit operations preserve their high bits.
1082 __ Mov(x0, dummy_high + 42);
1083 __ Sqdecp(x0, p0.VnB());
1084
1085 __ Mov(x1, dummy_high + 42);
1086 __ Sqincp(x1, p0.VnH());
1087
1088 // 32-bit operations sign-extend into their high bits.
1089 __ Mov(x2, dummy_high + 42);
1090 __ Sqdecp(x2, p0.VnS(), w2);
1091
1092 __ Mov(x3, dummy_high + 42);
1093 __ Sqincp(x3, p0.VnD(), w3);
1094
1095 __ Mov(x4, dummy_high + 1);
1096 __ Sqdecp(x4, p0.VnS(), w4);
1097
1098 __ Mov(x5, dummy_high - 1);
1099 __ Sqincp(x5, p0.VnD(), w5);
1100
1101 // Check that saturation behaves correctly.
1102 __ Mov(x10, 0x8000000000000001); // INT64_MIN + 1
1103 __ Sqdecp(x10, p0.VnB(), x10);
1104
1105 __ Mov(x11, dummy_high + 0x80000001); // INT32_MIN + 1
1106 __ Sqdecp(x11, p0.VnH(), w11);
1107
1108 __ Mov(x12, 1);
1109 __ Sqdecp(x12, p0.VnS(), x12);
1110
1111 __ Mov(x13, dummy_high + 1);
1112 __ Sqdecp(x13, p0.VnD(), w13);
1113
1114 __ Mov(x14, 0x7ffffffffffffffe); // INT64_MAX - 1
1115 __ Sqincp(x14, p0.VnB(), x14);
1116
1117 __ Mov(x15, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1118 __ Sqincp(x15, p0.VnH(), w15);
1119
1120 // Don't use x16 and x17 since they are scratch registers by default.
1121
1122 __ Mov(x18, 0xffffffffffffffff);
1123 __ Sqincp(x18, p0.VnS(), x18);
1124
1125 __ Mov(x19, dummy_high + 0xffffffff);
1126 __ Sqincp(x19, p0.VnD(), w19);
1127
1128 __ Mov(x20, dummy_high + 0xffffffff);
1129 __ Sqdecp(x20, p0.VnB(), w20);
1130
1131 // With an all-true predicate, these instructions increment or decrement by
1132 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001133 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001134
1135 __ Mov(x21, 0);
1136 __ Sqdecp(x21, p15.VnB(), x21);
1137
1138 __ Mov(x22, 0);
1139 __ Sqincp(x22, p15.VnH(), x22);
1140
1141 __ Mov(x23, dummy_high);
1142 __ Sqdecp(x23, p15.VnS(), w23);
1143
1144 __ Mov(x24, dummy_high);
1145 __ Sqincp(x24, p15.VnD(), w24);
1146
1147 END();
1148 if (CAN_RUN()) {
1149 RUN();
1150
1151 // 64-bit operations preserve their high bits.
1152 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1153 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1154
1155 // 32-bit operations sign-extend into their high bits.
1156 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1157 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1158 ASSERT_EQUAL_64(0xffffffff00000000 | (1 - p0_s_count), x4);
1159 ASSERT_EQUAL_64(p0_d_count - 1, x5);
1160
1161 // Check that saturation behaves correctly.
1162 ASSERT_EQUAL_64(INT64_MIN, x10);
1163 ASSERT_EQUAL_64(INT32_MIN, x11);
1164 ASSERT_EQUAL_64(1 - p0_s_count, x12);
1165 ASSERT_EQUAL_64(1 - p0_d_count, x13);
1166 ASSERT_EQUAL_64(INT64_MAX, x14);
1167 ASSERT_EQUAL_64(INT32_MAX, x15);
1168 ASSERT_EQUAL_64(p0_s_count - 1, x18);
1169 ASSERT_EQUAL_64(p0_d_count - 1, x19);
1170 ASSERT_EQUAL_64(-1 - p0_b_count, x20);
1171
1172 // Check all-true predicates.
1173 ASSERT_EQUAL_64(-core.GetSVELaneCount(kBRegSize), x21);
1174 ASSERT_EQUAL_64(core.GetSVELaneCount(kHRegSize), x22);
1175 ASSERT_EQUAL_64(-core.GetSVELaneCount(kSRegSize), x23);
1176 ASSERT_EQUAL_64(core.GetSVELaneCount(kDRegSize), x24);
1177 }
1178}
1179
Jacob Bramleye8289202019-07-31 11:25:23 +01001180TEST_SVE(sve_uqinc_uqdec_p_scalar) {
1181 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001182 START();
1183
1184 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1185 Initialise(&masm, p0.VnB(), p0_inputs);
1186
1187 int p0_b_count = 9;
1188 int p0_h_count = 5;
1189 int p0_s_count = 3;
1190 int p0_d_count = 2;
1191
1192 uint64_t dummy_high = 0x1234567800000000;
1193
1194 // 64-bit operations preserve their high bits.
1195 __ Mov(x0, dummy_high + 42);
1196 __ Uqdecp(x0, p0.VnB());
1197
1198 __ Mov(x1, dummy_high + 42);
1199 __ Uqincp(x1, p0.VnH());
1200
1201 // 32-bit operations zero-extend into their high bits.
1202 __ Mov(x2, dummy_high + 42);
1203 __ Uqdecp(x2, p0.VnS(), w2);
1204
1205 __ Mov(x3, dummy_high + 42);
1206 __ Uqincp(x3, p0.VnD(), w3);
1207
1208 __ Mov(x4, dummy_high + 0x80000001);
1209 __ Uqdecp(x4, p0.VnS(), w4);
1210
1211 __ Mov(x5, dummy_high + 0x7fffffff);
1212 __ Uqincp(x5, p0.VnD(), w5);
1213
1214 // Check that saturation behaves correctly.
1215 __ Mov(x10, 1);
1216 __ Uqdecp(x10, p0.VnB(), x10);
1217
1218 __ Mov(x11, dummy_high + 1);
1219 __ Uqdecp(x11, p0.VnH(), w11);
1220
1221 __ Mov(x12, 0x8000000000000000); // INT64_MAX + 1
1222 __ Uqdecp(x12, p0.VnS(), x12);
1223
1224 __ Mov(x13, dummy_high + 0x80000000); // INT32_MAX + 1
1225 __ Uqdecp(x13, p0.VnD(), w13);
1226
1227 __ Mov(x14, 0xfffffffffffffffe); // UINT64_MAX - 1
1228 __ Uqincp(x14, p0.VnB(), x14);
1229
1230 __ Mov(x15, dummy_high + 0xfffffffe); // UINT32_MAX - 1
1231 __ Uqincp(x15, p0.VnH(), w15);
1232
1233 // Don't use x16 and x17 since they are scratch registers by default.
1234
1235 __ Mov(x18, 0x7ffffffffffffffe); // INT64_MAX - 1
1236 __ Uqincp(x18, p0.VnS(), x18);
1237
1238 __ Mov(x19, dummy_high + 0x7ffffffe); // INT32_MAX - 1
1239 __ Uqincp(x19, p0.VnD(), w19);
1240
1241 // With an all-true predicate, these instructions increment or decrement by
1242 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001243 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001244
1245 __ Mov(x20, 0x4000000000000000);
1246 __ Uqdecp(x20, p15.VnB(), x20);
1247
1248 __ Mov(x21, 0x4000000000000000);
1249 __ Uqincp(x21, p15.VnH(), x21);
1250
1251 __ Mov(x22, dummy_high + 0x40000000);
1252 __ Uqdecp(x22, p15.VnS(), w22);
1253
1254 __ Mov(x23, dummy_high + 0x40000000);
1255 __ Uqincp(x23, p15.VnD(), w23);
1256
1257 END();
1258 if (CAN_RUN()) {
1259 RUN();
1260
1261 // 64-bit operations preserve their high bits.
1262 ASSERT_EQUAL_64(dummy_high + 42 - p0_b_count, x0);
1263 ASSERT_EQUAL_64(dummy_high + 42 + p0_h_count, x1);
1264
1265 // 32-bit operations zero-extend into their high bits.
1266 ASSERT_EQUAL_64(42 - p0_s_count, x2);
1267 ASSERT_EQUAL_64(42 + p0_d_count, x3);
1268 ASSERT_EQUAL_64(UINT64_C(0x80000001) - p0_s_count, x4);
1269 ASSERT_EQUAL_64(UINT64_C(0x7fffffff) + p0_d_count, x5);
1270
1271 // Check that saturation behaves correctly.
1272 ASSERT_EQUAL_64(0, x10);
1273 ASSERT_EQUAL_64(0, x11);
1274 ASSERT_EQUAL_64(0x8000000000000000 - p0_s_count, x12);
1275 ASSERT_EQUAL_64(UINT64_C(0x80000000) - p0_d_count, x13);
1276 ASSERT_EQUAL_64(UINT64_MAX, x14);
1277 ASSERT_EQUAL_64(UINT32_MAX, x15);
1278 ASSERT_EQUAL_64(0x7ffffffffffffffe + p0_s_count, x18);
1279 ASSERT_EQUAL_64(UINT64_C(0x7ffffffe) + p0_d_count, x19);
1280
1281 // Check all-true predicates.
1282 ASSERT_EQUAL_64(0x4000000000000000 - core.GetSVELaneCount(kBRegSize), x20);
1283 ASSERT_EQUAL_64(0x4000000000000000 + core.GetSVELaneCount(kHRegSize), x21);
1284 ASSERT_EQUAL_64(0x40000000 - core.GetSVELaneCount(kSRegSize), x22);
1285 ASSERT_EQUAL_64(0x40000000 + core.GetSVELaneCount(kDRegSize), x23);
1286 }
1287}
1288
Jacob Bramleye8289202019-07-31 11:25:23 +01001289TEST_SVE(sve_inc_dec_p_vector) {
1290 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001291 START();
1292
1293 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1294 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1295 Initialise(&masm, p0.VnB(), p0_inputs);
1296
1297 // Check that saturation does not occur.
1298
1299 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1300 InsrHelper(&masm, z0.VnD(), z0_inputs);
1301
1302 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1303 InsrHelper(&masm, z1.VnD(), z1_inputs);
1304
1305 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1306 InsrHelper(&masm, z2.VnS(), z2_inputs);
1307
1308 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1309 InsrHelper(&masm, z3.VnH(), z3_inputs);
1310
1311 // The MacroAssembler implements non-destructive operations using movprfx.
1312 __ Decp(z10.VnD(), p0, z0.VnD());
1313 __ Decp(z11.VnD(), p0, z1.VnD());
1314 __ Decp(z12.VnS(), p0, z2.VnS());
1315 __ Decp(z13.VnH(), p0, z3.VnH());
1316
1317 __ Incp(z14.VnD(), p0, z0.VnD());
1318 __ Incp(z15.VnD(), p0, z1.VnD());
1319 __ Incp(z16.VnS(), p0, z2.VnS());
1320 __ Incp(z17.VnH(), p0, z3.VnH());
1321
1322 // Also test destructive forms.
1323 __ Mov(z4, z0);
1324 __ Mov(z5, z1);
1325 __ Mov(z6, z2);
1326 __ Mov(z7, z3);
1327
1328 __ Decp(z0.VnD(), p0);
1329 __ Decp(z1.VnD(), p0);
1330 __ Decp(z2.VnS(), p0);
1331 __ Decp(z3.VnH(), p0);
1332
1333 __ Incp(z4.VnD(), p0);
1334 __ Incp(z5.VnD(), p0);
1335 __ Incp(z6.VnS(), p0);
1336 __ Incp(z7.VnH(), p0);
1337
1338 END();
1339 if (CAN_RUN()) {
1340 RUN();
1341
1342 // z0_inputs[...] - number of active D lanes (2)
1343 int64_t z0_expected[] = {0x1234567800000040, -2, -1, 0x7ffffffffffffffe};
1344 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1345
1346 // z1_inputs[...] - number of active D lanes (2)
1347 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1348 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1349
1350 // z2_inputs[...] - number of active S lanes (3)
1351 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, 0x7ffffffd};
1352 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1353
1354 // z3_inputs[...] - number of active H lanes (5)
1355 int16_t z3_expected[] = {0x1225, -5, -4, -6, 0x7ffb, 0x7ffa};
1356 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1357
1358 // z0_inputs[...] + number of active D lanes (2)
1359 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1360 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1361
1362 // z1_inputs[...] + number of active D lanes (2)
1363 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, 0x8000000000000001};
1364 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1365
1366 // z2_inputs[...] + number of active S lanes (3)
1367 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, 0x80000002, 0x80000003};
1368 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1369
1370 // z3_inputs[...] + number of active H lanes (5)
1371 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, 0x8004};
1372 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1373
1374 // Check that the non-destructive macros produced the same results.
1375 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1376 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1377 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1378 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1379 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1380 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1381 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1382 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1383 }
1384}
1385
Jacob Bramleye8289202019-07-31 11:25:23 +01001386TEST_SVE(sve_inc_dec_ptrue_vector) {
1387 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001388 START();
1389
1390 // With an all-true predicate, these instructions increment or decrement by
1391 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001392 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001393
1394 __ Dup(z0.VnD(), 0);
1395 __ Decp(z0.VnD(), p15);
1396
1397 __ Dup(z1.VnS(), 0);
1398 __ Decp(z1.VnS(), p15);
1399
1400 __ Dup(z2.VnH(), 0);
1401 __ Decp(z2.VnH(), p15);
1402
1403 __ Dup(z3.VnD(), 0);
1404 __ Incp(z3.VnD(), p15);
1405
1406 __ Dup(z4.VnS(), 0);
1407 __ Incp(z4.VnS(), p15);
1408
1409 __ Dup(z5.VnH(), 0);
1410 __ Incp(z5.VnH(), p15);
1411
1412 END();
1413 if (CAN_RUN()) {
1414 RUN();
1415
1416 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1417 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1418 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1419
1420 for (int i = 0; i < d_lane_count; i++) {
1421 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1422 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1423 }
1424
1425 for (int i = 0; i < s_lane_count; i++) {
1426 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1427 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1428 }
1429
1430 for (int i = 0; i < h_lane_count; i++) {
1431 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1432 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1433 }
1434 }
1435}
1436
Jacob Bramleye8289202019-07-31 11:25:23 +01001437TEST_SVE(sve_sqinc_sqdec_p_vector) {
1438 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001439 START();
1440
1441 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1442 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1443 Initialise(&masm, p0.VnB(), p0_inputs);
1444
1445 // Check that saturation behaves correctly.
1446
1447 int64_t z0_inputs[] = {0x1234567800000042, 0, 1, INT64_MIN};
1448 InsrHelper(&masm, z0.VnD(), z0_inputs);
1449
1450 int64_t z1_inputs[] = {0x12345678ffffff2a, 0, -1, INT64_MAX};
1451 InsrHelper(&masm, z1.VnD(), z1_inputs);
1452
1453 int32_t z2_inputs[] = {0x12340042, 0, -1, 1, INT32_MAX, INT32_MIN};
1454 InsrHelper(&masm, z2.VnS(), z2_inputs);
1455
1456 int16_t z3_inputs[] = {0x122a, 0, 1, -1, INT16_MIN, INT16_MAX};
1457 InsrHelper(&masm, z3.VnH(), z3_inputs);
1458
1459 // The MacroAssembler implements non-destructive operations using movprfx.
1460 __ Sqdecp(z10.VnD(), p0, z0.VnD());
1461 __ Sqdecp(z11.VnD(), p0, z1.VnD());
1462 __ Sqdecp(z12.VnS(), p0, z2.VnS());
1463 __ Sqdecp(z13.VnH(), p0, z3.VnH());
1464
1465 __ Sqincp(z14.VnD(), p0, z0.VnD());
1466 __ Sqincp(z15.VnD(), p0, z1.VnD());
1467 __ Sqincp(z16.VnS(), p0, z2.VnS());
1468 __ Sqincp(z17.VnH(), p0, z3.VnH());
1469
1470 // Also test destructive forms.
1471 __ Mov(z4, z0);
1472 __ Mov(z5, z1);
1473 __ Mov(z6, z2);
1474 __ Mov(z7, z3);
1475
1476 __ Sqdecp(z0.VnD(), p0);
1477 __ Sqdecp(z1.VnD(), p0);
1478 __ Sqdecp(z2.VnS(), p0);
1479 __ Sqdecp(z3.VnH(), p0);
1480
1481 __ Sqincp(z4.VnD(), p0);
1482 __ Sqincp(z5.VnD(), p0);
1483 __ Sqincp(z6.VnS(), p0);
1484 __ Sqincp(z7.VnH(), p0);
1485
1486 END();
1487 if (CAN_RUN()) {
1488 RUN();
1489
1490 // z0_inputs[...] - number of active D lanes (2)
1491 int64_t z0_expected[] = {0x1234567800000040, -2, -1, INT64_MIN};
1492 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1493
1494 // z1_inputs[...] - number of active D lanes (2)
1495 int64_t z1_expected[] = {0x12345678ffffff28, -2, -3, 0x7ffffffffffffffd};
1496 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1497
1498 // z2_inputs[...] - number of active S lanes (3)
1499 int32_t z2_expected[] = {0x1234003f, -3, -4, -2, 0x7ffffffc, INT32_MIN};
1500 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1501
1502 // z3_inputs[...] - number of active H lanes (5)
1503 int16_t z3_expected[] = {0x1225, -5, -4, -6, INT16_MIN, 0x7ffa};
1504 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1505
1506 // z0_inputs[...] + number of active D lanes (2)
1507 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1508 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1509
1510 // z1_inputs[...] + number of active D lanes (2)
1511 uint64_t z5_expected[] = {0x12345678ffffff2c, 2, 1, INT64_MAX};
1512 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1513
1514 // z2_inputs[...] + number of active S lanes (3)
1515 uint32_t z6_expected[] = {0x12340045, 3, 2, 4, INT32_MAX, 0x80000003};
1516 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1517
1518 // z3_inputs[...] + number of active H lanes (5)
1519 uint16_t z7_expected[] = {0x122f, 5, 6, 4, 0x8005, INT16_MAX};
1520 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1521
1522 // Check that the non-destructive macros produced the same results.
1523 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1524 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1525 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1526 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1527 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1528 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1529 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1530 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1531 }
1532}
1533
Jacob Bramleye8289202019-07-31 11:25:23 +01001534TEST_SVE(sve_sqinc_sqdec_ptrue_vector) {
1535 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001536 START();
1537
1538 // With an all-true predicate, these instructions increment or decrement by
1539 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001540 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001541
1542 __ Dup(z0.VnD(), 0);
1543 __ Sqdecp(z0.VnD(), p15);
1544
1545 __ Dup(z1.VnS(), 0);
1546 __ Sqdecp(z1.VnS(), p15);
1547
1548 __ Dup(z2.VnH(), 0);
1549 __ Sqdecp(z2.VnH(), p15);
1550
1551 __ Dup(z3.VnD(), 0);
1552 __ Sqincp(z3.VnD(), p15);
1553
1554 __ Dup(z4.VnS(), 0);
1555 __ Sqincp(z4.VnS(), p15);
1556
1557 __ Dup(z5.VnH(), 0);
1558 __ Sqincp(z5.VnH(), p15);
1559
1560 END();
1561 if (CAN_RUN()) {
1562 RUN();
1563
1564 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1565 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1566 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1567
1568 for (int i = 0; i < d_lane_count; i++) {
1569 ASSERT_EQUAL_SVE_LANE(-d_lane_count, z0.VnD(), i);
1570 ASSERT_EQUAL_SVE_LANE(d_lane_count, z3.VnD(), i);
1571 }
1572
1573 for (int i = 0; i < s_lane_count; i++) {
1574 ASSERT_EQUAL_SVE_LANE(-s_lane_count, z1.VnS(), i);
1575 ASSERT_EQUAL_SVE_LANE(s_lane_count, z4.VnS(), i);
1576 }
1577
1578 for (int i = 0; i < h_lane_count; i++) {
1579 ASSERT_EQUAL_SVE_LANE(-h_lane_count, z2.VnH(), i);
1580 ASSERT_EQUAL_SVE_LANE(h_lane_count, z5.VnH(), i);
1581 }
1582 }
1583}
1584
Jacob Bramleye8289202019-07-31 11:25:23 +01001585TEST_SVE(sve_uqinc_uqdec_p_vector) {
1586 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001587 START();
1588
1589 // There are {5, 3, 2} active {H, S, D} lanes. B-sized lanes are ignored.
1590 int p0_inputs[] = {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
1591 Initialise(&masm, p0.VnB(), p0_inputs);
1592
1593 // Check that saturation behaves correctly.
1594
1595 uint64_t z0_inputs[] = {0x1234567800000042, 0, 1, 0x8000000000000000};
1596 InsrHelper(&masm, z0.VnD(), z0_inputs);
1597
1598 uint64_t z1_inputs[] = {0x12345678ffffff2a, 0, UINT64_MAX, INT64_MAX};
1599 InsrHelper(&masm, z1.VnD(), z1_inputs);
1600
1601 uint32_t z2_inputs[] = {0x12340042, 0, UINT32_MAX, 1, INT32_MAX, 0x80000000};
1602 InsrHelper(&masm, z2.VnS(), z2_inputs);
1603
1604 uint16_t z3_inputs[] = {0x122a, 0, 1, UINT16_MAX, 0x8000, INT16_MAX};
1605 InsrHelper(&masm, z3.VnH(), z3_inputs);
1606
1607 // The MacroAssembler implements non-destructive operations using movprfx.
1608 __ Uqdecp(z10.VnD(), p0, z0.VnD());
1609 __ Uqdecp(z11.VnD(), p0, z1.VnD());
1610 __ Uqdecp(z12.VnS(), p0, z2.VnS());
1611 __ Uqdecp(z13.VnH(), p0, z3.VnH());
1612
1613 __ Uqincp(z14.VnD(), p0, z0.VnD());
1614 __ Uqincp(z15.VnD(), p0, z1.VnD());
1615 __ Uqincp(z16.VnS(), p0, z2.VnS());
1616 __ Uqincp(z17.VnH(), p0, z3.VnH());
1617
1618 // Also test destructive forms.
1619 __ Mov(z4, z0);
1620 __ Mov(z5, z1);
1621 __ Mov(z6, z2);
1622 __ Mov(z7, z3);
1623
1624 __ Uqdecp(z0.VnD(), p0);
1625 __ Uqdecp(z1.VnD(), p0);
1626 __ Uqdecp(z2.VnS(), p0);
1627 __ Uqdecp(z3.VnH(), p0);
1628
1629 __ Uqincp(z4.VnD(), p0);
1630 __ Uqincp(z5.VnD(), p0);
1631 __ Uqincp(z6.VnS(), p0);
1632 __ Uqincp(z7.VnH(), p0);
1633
1634 END();
1635 if (CAN_RUN()) {
1636 RUN();
1637
1638 // z0_inputs[...] - number of active D lanes (2)
1639 uint64_t z0_expected[] = {0x1234567800000040, 0, 0, 0x7ffffffffffffffe};
1640 ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
1641
1642 // z1_inputs[...] - number of active D lanes (2)
1643 uint64_t z1_expected[] = {0x12345678ffffff28,
1644 0,
1645 0xfffffffffffffffd,
1646 0x7ffffffffffffffd};
1647 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
1648
1649 // z2_inputs[...] - number of active S lanes (3)
1650 uint32_t z2_expected[] =
1651 {0x1234003f, 0, 0xfffffffc, 0, 0x7ffffffc, 0x7ffffffd};
1652 ASSERT_EQUAL_SVE(z2_expected, z2.VnS());
1653
1654 // z3_inputs[...] - number of active H lanes (5)
1655 uint16_t z3_expected[] = {0x1225, 0, 0, 0xfffa, 0x7ffb, 0x7ffa};
1656 ASSERT_EQUAL_SVE(z3_expected, z3.VnH());
1657
1658 // z0_inputs[...] + number of active D lanes (2)
1659 uint64_t z4_expected[] = {0x1234567800000044, 2, 3, 0x8000000000000002};
1660 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
1661
1662 // z1_inputs[...] + number of active D lanes (2)
1663 uint64_t z5_expected[] = {0x12345678ffffff2c,
1664 2,
1665 UINT64_MAX,
1666 0x8000000000000001};
1667 ASSERT_EQUAL_SVE(z5_expected, z5.VnD());
1668
1669 // z2_inputs[...] + number of active S lanes (3)
1670 uint32_t z6_expected[] =
1671 {0x12340045, 3, UINT32_MAX, 4, 0x80000002, 0x80000003};
1672 ASSERT_EQUAL_SVE(z6_expected, z6.VnS());
1673
1674 // z3_inputs[...] + number of active H lanes (5)
1675 uint16_t z7_expected[] = {0x122f, 5, 6, UINT16_MAX, 0x8005, 0x8004};
1676 ASSERT_EQUAL_SVE(z7_expected, z7.VnH());
1677
1678 // Check that the non-destructive macros produced the same results.
1679 ASSERT_EQUAL_SVE(z0_expected, z10.VnD());
1680 ASSERT_EQUAL_SVE(z1_expected, z11.VnD());
1681 ASSERT_EQUAL_SVE(z2_expected, z12.VnS());
1682 ASSERT_EQUAL_SVE(z3_expected, z13.VnH());
1683 ASSERT_EQUAL_SVE(z4_expected, z14.VnD());
1684 ASSERT_EQUAL_SVE(z5_expected, z15.VnD());
1685 ASSERT_EQUAL_SVE(z6_expected, z16.VnS());
1686 ASSERT_EQUAL_SVE(z7_expected, z17.VnH());
1687 }
1688}
1689
Jacob Bramleye8289202019-07-31 11:25:23 +01001690TEST_SVE(sve_uqinc_uqdec_ptrue_vector) {
1691 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001692 START();
1693
1694 // With an all-true predicate, these instructions increment or decrement by
1695 // the vector length.
Jacob Bramley0ce75842019-07-17 18:12:50 +01001696 __ Ptrue(p15.VnB());
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001697
1698 __ Mov(x0, 0x1234567800000000);
1699 __ Mov(x1, 0x12340000);
1700 __ Mov(x2, 0x1200);
1701
1702 __ Dup(z0.VnD(), x0);
1703 __ Uqdecp(z0.VnD(), p15);
1704
1705 __ Dup(z1.VnS(), x1);
1706 __ Uqdecp(z1.VnS(), p15);
1707
1708 __ Dup(z2.VnH(), x2);
1709 __ Uqdecp(z2.VnH(), p15);
1710
1711 __ Dup(z3.VnD(), x0);
1712 __ Uqincp(z3.VnD(), p15);
1713
1714 __ Dup(z4.VnS(), x1);
1715 __ Uqincp(z4.VnS(), p15);
1716
1717 __ Dup(z5.VnH(), x2);
1718 __ Uqincp(z5.VnH(), p15);
1719
1720 END();
1721 if (CAN_RUN()) {
1722 RUN();
1723
1724 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1725 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1726 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1727
1728 for (int i = 0; i < d_lane_count; i++) {
1729 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 - d_lane_count, z0.VnD(), i);
1730 ASSERT_EQUAL_SVE_LANE(0x1234567800000000 + d_lane_count, z3.VnD(), i);
1731 }
1732
1733 for (int i = 0; i < s_lane_count; i++) {
1734 ASSERT_EQUAL_SVE_LANE(0x12340000 - s_lane_count, z1.VnS(), i);
1735 ASSERT_EQUAL_SVE_LANE(0x12340000 + s_lane_count, z4.VnS(), i);
1736 }
1737
1738 for (int i = 0; i < h_lane_count; i++) {
1739 ASSERT_EQUAL_SVE_LANE(0x1200 - h_lane_count, z2.VnH(), i);
1740 ASSERT_EQUAL_SVE_LANE(0x1200 + h_lane_count, z5.VnH(), i);
1741 }
1742 }
1743}
1744
Jacob Bramleye8289202019-07-31 11:25:23 +01001745TEST_SVE(sve_index) {
1746 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleycd8148c2019-07-11 18:43:20 +01001747 START();
1748
1749 // Simple cases.
1750 __ Index(z0.VnB(), 0, 1);
1751 __ Index(z1.VnH(), 1, 1);
1752 __ Index(z2.VnS(), 2, 1);
1753 __ Index(z3.VnD(), 3, 1);
1754
1755 // Synthesised immediates.
1756 __ Index(z4.VnB(), 42, -1);
1757 __ Index(z5.VnH(), -1, 42);
1758 __ Index(z6.VnS(), 42, 42);
1759
1760 // Register arguments.
1761 __ Mov(x0, 42);
1762 __ Mov(x1, -3);
1763 __ Index(z10.VnD(), x0, x1);
1764 __ Index(z11.VnB(), w0, w1);
1765 // The register size should correspond to the lane size, but VIXL allows any
1766 // register at least as big as the lane size.
1767 __ Index(z12.VnB(), x0, x1);
1768 __ Index(z13.VnH(), w0, x1);
1769 __ Index(z14.VnS(), x0, w1);
1770
1771 // Integer overflow.
1772 __ Index(z20.VnB(), UINT8_MAX - 2, 2);
1773 __ Index(z21.VnH(), 7, -3);
1774 __ Index(z22.VnS(), INT32_MAX - 2, 1);
1775 __ Index(z23.VnD(), INT64_MIN + 6, -7);
1776
1777 END();
1778
1779 if (CAN_RUN()) {
1780 RUN();
1781
1782 int b_lane_count = core.GetSVELaneCount(kBRegSize);
1783 int h_lane_count = core.GetSVELaneCount(kHRegSize);
1784 int s_lane_count = core.GetSVELaneCount(kSRegSize);
1785 int d_lane_count = core.GetSVELaneCount(kDRegSize);
1786
1787 uint64_t b_mask = GetUintMask(kBRegSize);
1788 uint64_t h_mask = GetUintMask(kHRegSize);
1789 uint64_t s_mask = GetUintMask(kSRegSize);
1790 uint64_t d_mask = GetUintMask(kDRegSize);
1791
1792 // Simple cases.
1793 for (int i = 0; i < b_lane_count; i++) {
1794 ASSERT_EQUAL_SVE_LANE((0 + i) & b_mask, z0.VnB(), i);
1795 }
1796 for (int i = 0; i < h_lane_count; i++) {
1797 ASSERT_EQUAL_SVE_LANE((1 + i) & h_mask, z1.VnH(), i);
1798 }
1799 for (int i = 0; i < s_lane_count; i++) {
1800 ASSERT_EQUAL_SVE_LANE((2 + i) & s_mask, z2.VnS(), i);
1801 }
1802 for (int i = 0; i < d_lane_count; i++) {
1803 ASSERT_EQUAL_SVE_LANE((3 + i) & d_mask, z3.VnD(), i);
1804 }
1805
1806 // Synthesised immediates.
1807 for (int i = 0; i < b_lane_count; i++) {
1808 ASSERT_EQUAL_SVE_LANE((42 - i) & b_mask, z4.VnB(), i);
1809 }
1810 for (int i = 0; i < h_lane_count; i++) {
1811 ASSERT_EQUAL_SVE_LANE((-1 + (42 * i)) & h_mask, z5.VnH(), i);
1812 }
1813 for (int i = 0; i < s_lane_count; i++) {
1814 ASSERT_EQUAL_SVE_LANE((42 + (42 * i)) & s_mask, z6.VnS(), i);
1815 }
1816
1817 // Register arguments.
1818 for (int i = 0; i < d_lane_count; i++) {
1819 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & d_mask, z10.VnD(), i);
1820 }
1821 for (int i = 0; i < b_lane_count; i++) {
1822 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z11.VnB(), i);
1823 }
1824 for (int i = 0; i < b_lane_count; i++) {
1825 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & b_mask, z12.VnB(), i);
1826 }
1827 for (int i = 0; i < h_lane_count; i++) {
1828 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & h_mask, z13.VnH(), i);
1829 }
1830 for (int i = 0; i < s_lane_count; i++) {
1831 ASSERT_EQUAL_SVE_LANE((42 - (3 * i)) & s_mask, z14.VnS(), i);
1832 }
1833
1834 // Integer overflow.
1835 uint8_t expected_z20[] = {0x05, 0x03, 0x01, 0xff, 0xfd};
1836 ASSERT_EQUAL_SVE(expected_z20, z20.VnB());
1837 uint16_t expected_z21[] = {0xfffb, 0xfffe, 0x0001, 0x0004, 0x0007};
1838 ASSERT_EQUAL_SVE(expected_z21, z21.VnH());
1839 uint32_t expected_z22[] = {0x80000000, 0x7fffffff, 0x7ffffffe, 0x7ffffffd};
1840 ASSERT_EQUAL_SVE(expected_z22, z22.VnS());
1841 uint64_t expected_z23[] = {0x7fffffffffffffff, 0x8000000000000006};
1842 ASSERT_EQUAL_SVE(expected_z23, z23.VnD());
1843 }
1844}
1845
TatWai Chongc844bb22019-06-10 15:32:53 -07001846TEST(sve_int_compare_count_and_limit_scalars) {
1847 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1848 START();
1849
1850 __ Mov(w20, 0xfffffffd);
1851 __ Mov(w21, 0xffffffff);
1852
1853 __ Whilele(p0.VnB(), w20, w21);
1854 __ Mrs(x0, NZCV);
1855 __ Whilele(p1.VnH(), w20, w21);
1856 __ Mrs(x1, NZCV);
1857
1858 __ Mov(w20, 0xffffffff);
1859 __ Mov(w21, 0x00000000);
1860
1861 __ Whilelt(p2.VnS(), w20, w21);
1862 __ Mrs(x2, NZCV);
1863 __ Whilelt(p3.VnD(), w20, w21);
1864 __ Mrs(x3, NZCV);
1865
1866 __ Mov(w20, 0xfffffffd);
1867 __ Mov(w21, 0xffffffff);
1868
1869 __ Whilels(p4.VnB(), w20, w21);
1870 __ Mrs(x4, NZCV);
1871 __ Whilels(p5.VnH(), w20, w21);
1872 __ Mrs(x5, NZCV);
1873
1874 __ Mov(w20, 0xffffffff);
1875 __ Mov(w21, 0x00000000);
1876
1877 __ Whilelo(p6.VnS(), w20, w21);
1878 __ Mrs(x6, NZCV);
1879 __ Whilelo(p7.VnD(), w20, w21);
1880 __ Mrs(x7, NZCV);
1881
1882 __ Mov(x20, 0xfffffffffffffffd);
1883 __ Mov(x21, 0xffffffffffffffff);
1884
1885 __ Whilele(p8.VnB(), x20, x21);
1886 __ Mrs(x8, NZCV);
1887 __ Whilele(p9.VnH(), x20, x21);
1888 __ Mrs(x9, NZCV);
1889
1890 __ Mov(x20, 0xffffffffffffffff);
1891 __ Mov(x21, 0x0000000000000000);
1892
1893 __ Whilelt(p10.VnS(), x20, x21);
1894 __ Mrs(x10, NZCV);
1895 __ Whilelt(p11.VnD(), x20, x21);
1896 __ Mrs(x11, NZCV);
1897
1898 __ Mov(x20, 0xfffffffffffffffd);
1899 __ Mov(x21, 0xffffffffffffffff);
1900
1901 __ Whilels(p12.VnB(), x20, x21);
1902 __ Mrs(x12, NZCV);
1903 __ Whilels(p13.VnH(), x20, x21);
1904 __ Mrs(x13, NZCV);
1905
1906 __ Mov(x20, 0xffffffffffffffff);
1907 __ Mov(x21, 0x0000000000000000);
1908
1909 __ Whilelo(p14.VnS(), x20, x21);
1910 __ Mrs(x14, NZCV);
1911 __ Whilelo(p15.VnD(), x20, x21);
1912 __ Mrs(x15, NZCV);
1913
1914 END();
1915
1916 if (CAN_RUN()) {
1917 RUN();
1918
1919 // 0b...00000000'00000111
1920 int p0_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
1921 ASSERT_EQUAL_SVE(p0_expected, p0.VnB());
1922
1923 // 0b...00000000'00010101
1924 int p1_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
1925 ASSERT_EQUAL_SVE(p1_expected, p1.VnH());
1926
1927 int p2_expected[] = {0x0, 0x0, 0x0, 0x1};
1928 ASSERT_EQUAL_SVE(p2_expected, p2.VnS());
1929
1930 int p3_expected[] = {0x00, 0x01};
1931 ASSERT_EQUAL_SVE(p3_expected, p3.VnD());
1932
1933 // 0b...11111111'11111111
1934 int p4_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1935 ASSERT_EQUAL_SVE(p4_expected, p4.VnB());
1936
1937 // 0b...01010101'01010101
1938 int p5_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
1939 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
1940
1941 int p6_expected[] = {0x0, 0x0, 0x0, 0x0};
1942 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
1943
1944 int p7_expected[] = {0x00, 0x00};
1945 ASSERT_EQUAL_SVE(p7_expected, p7.VnD());
1946
1947 // 0b...00000000'00000111
1948 int p8_expected[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
1949 ASSERT_EQUAL_SVE(p8_expected, p8.VnB());
1950
1951 // 0b...00000000'00010101
1952 int p9_expected[] = {0, 0, 0, 0, 0, 1, 1, 1};
1953 ASSERT_EQUAL_SVE(p9_expected, p9.VnH());
1954
1955 int p10_expected[] = {0x0, 0x0, 0x0, 0x1};
1956 ASSERT_EQUAL_SVE(p10_expected, p10.VnS());
1957
1958 int p11_expected[] = {0x00, 0x01};
1959 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
1960
1961 // 0b...11111111'11111111
1962 int p12_expected[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1963 ASSERT_EQUAL_SVE(p12_expected, p12.VnB());
1964
1965 // 0b...01010101'01010101
1966 int p13_expected[] = {1, 1, 1, 1, 1, 1, 1, 1};
1967 ASSERT_EQUAL_SVE(p13_expected, p13.VnH());
1968
1969 int p14_expected[] = {0x0, 0x0, 0x0, 0x0};
1970 ASSERT_EQUAL_SVE(p14_expected, p14.VnS());
1971
1972 int p15_expected[] = {0x00, 0x00};
1973 ASSERT_EQUAL_SVE(p15_expected, p15.VnD());
1974
1975 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w0);
1976 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w1);
1977 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w2);
1978 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w3);
1979 ASSERT_EQUAL_32(SVEFirstFlag, w4);
1980 ASSERT_EQUAL_32(SVEFirstFlag, w5);
1981 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w6);
1982 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w7);
1983 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w8);
1984 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w9);
1985 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w10);
1986 ASSERT_EQUAL_32(SVEFirstFlag | SVENotLastFlag, w11);
1987 ASSERT_EQUAL_32(SVEFirstFlag, w12);
1988 ASSERT_EQUAL_32(SVEFirstFlag, w13);
1989 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w14);
1990 ASSERT_EQUAL_32(SVENoneFlag | SVENotLastFlag, w15);
1991 }
1992}
1993
TatWai Chong302729c2019-06-14 16:18:51 -07001994TEST(sve_int_compare_vectors_signed_imm) {
1995 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
1996 START();
1997
1998 int z13_inputs[] = {0, 1, -1, -15, 126, -127, -126, -15};
1999 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 1, 1};
2000 InsrHelper(&masm, z13.VnB(), z13_inputs);
2001 Initialise(&masm, p0.VnB(), mask_inputs1);
2002
2003 __ Cmpeq(p2.VnB(), p0.Zeroing(), z13.VnB(), -15);
2004 __ Mrs(x2, NZCV);
2005 __ Cmpeq(p3.VnB(), p0.Zeroing(), z13.VnB(), -127);
2006
2007 int z14_inputs[] = {0, 1, -1, -32767, -32766, 32767, 32766, 0};
2008 int mask_inputs2[] = {1, 1, 1, 0, 1, 1, 1, 1};
2009 InsrHelper(&masm, z14.VnH(), z14_inputs);
2010 Initialise(&masm, p0.VnH(), mask_inputs2);
2011
2012 __ Cmpge(p4.VnH(), p0.Zeroing(), z14.VnH(), -1);
2013 __ Mrs(x4, NZCV);
2014 __ Cmpge(p5.VnH(), p0.Zeroing(), z14.VnH(), -32767);
2015
2016 int z15_inputs[] = {0, 1, -1, INT_MIN};
2017 int mask_inputs3[] = {0, 1, 1, 1};
2018 InsrHelper(&masm, z15.VnS(), z15_inputs);
2019 Initialise(&masm, p0.VnS(), mask_inputs3);
2020
2021 __ Cmpgt(p6.VnS(), p0.Zeroing(), z15.VnS(), 0);
2022 __ Mrs(x6, NZCV);
2023 __ Cmpgt(p7.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2024
2025 __ Cmplt(p8.VnS(), p0.Zeroing(), z15.VnS(), 0);
2026 __ Mrs(x8, NZCV);
2027 __ Cmplt(p9.VnS(), p0.Zeroing(), z15.VnS(), INT_MIN + 1);
2028
2029 int64_t z16_inputs[] = {0, -1};
2030 int mask_inputs4[] = {1, 1};
2031 InsrHelper(&masm, z16.VnD(), z16_inputs);
2032 Initialise(&masm, p0.VnD(), mask_inputs4);
2033
2034 __ Cmple(p10.VnD(), p0.Zeroing(), z16.VnD(), -1);
2035 __ Mrs(x10, NZCV);
2036 __ Cmple(p11.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MIN);
2037
2038 __ Cmpne(p12.VnD(), p0.Zeroing(), z16.VnD(), -1);
2039 __ Mrs(x12, NZCV);
2040 __ Cmpne(p13.VnD(), p0.Zeroing(), z16.VnD(), LLONG_MAX);
2041
2042 END();
2043
2044 if (CAN_RUN()) {
2045 RUN();
2046
2047 int p2_expected[] = {0, 0, 0, 0, 0, 0, 0, 1};
2048 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2049
2050 int p3_expected[] = {0, 0, 0, 0, 0, 1, 0, 0};
2051 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2052
2053 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1, 0x1};
2054 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2055
2056 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1};
2057 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2058
2059 int p6_expected[] = {0x0, 0x1, 0x0, 0x0};
2060 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2061
2062 int p7_expected[] = {0x0, 0x1, 0x1, 0x0};
2063 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2064
2065 int p8_expected[] = {0x0, 0x0, 0x1, 0x1};
2066 ASSERT_EQUAL_SVE(p8_expected, p8.VnS());
2067
2068 int p9_expected[] = {0x0, 0x0, 0x0, 0x1};
2069 ASSERT_EQUAL_SVE(p9_expected, p9.VnS());
2070
2071 int p10_expected[] = {0x00, 0x01};
2072 ASSERT_EQUAL_SVE(p10_expected, p10.VnD());
2073
2074 int p11_expected[] = {0x00, 0x00};
2075 ASSERT_EQUAL_SVE(p11_expected, p11.VnD());
2076
2077 int p12_expected[] = {0x01, 0x00};
2078 ASSERT_EQUAL_SVE(p12_expected, p12.VnD());
2079
2080 int p13_expected[] = {0x01, 0x01};
2081 ASSERT_EQUAL_SVE(p13_expected, p13.VnD());
2082
2083 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w2);
2084 ASSERT_EQUAL_32(SVEFirstFlag, w4);
2085 ASSERT_EQUAL_32(NoFlag, w6);
2086 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2087 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w10);
2088 ASSERT_EQUAL_32(NoFlag, w12);
2089 }
2090}
2091
2092TEST(sve_int_compare_vectors_unsigned_imm) {
2093 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2094 START();
2095
2096 uint32_t src1_inputs[] = {0xf7, 0x0f, 0x8f, 0x1f, 0x83, 0x12, 0x00, 0xf1};
2097 int mask_inputs1[] = {1, 1, 1, 0, 1, 1, 0, 1};
2098 InsrHelper(&masm, z13.VnB(), src1_inputs);
2099 Initialise(&masm, p0.VnB(), mask_inputs1);
2100
2101 __ Cmphi(p2.VnB(), p0.Zeroing(), z13.VnB(), 0x0f);
2102 __ Mrs(x2, NZCV);
2103 __ Cmphi(p3.VnB(), p0.Zeroing(), z13.VnB(), 0xf0);
2104
2105 uint32_t src2_inputs[] = {0xffff, 0x8000, 0x1fff, 0x0000, 0x1234};
2106 int mask_inputs2[] = {1, 1, 1, 1, 0};
2107 InsrHelper(&masm, z13.VnH(), src2_inputs);
2108 Initialise(&masm, p0.VnH(), mask_inputs2);
2109
2110 __ Cmphs(p4.VnH(), p0.Zeroing(), z13.VnH(), 0x1f);
2111 __ Mrs(x4, NZCV);
2112 __ Cmphs(p5.VnH(), p0.Zeroing(), z13.VnH(), 0x1fff);
2113
2114 uint32_t src3_inputs[] = {0xffffffff, 0xfedcba98, 0x0000ffff, 0x00000000};
2115 int mask_inputs3[] = {1, 1, 1, 1};
2116 InsrHelper(&masm, z13.VnS(), src3_inputs);
2117 Initialise(&masm, p0.VnS(), mask_inputs3);
2118
2119 __ Cmplo(p6.VnS(), p0.Zeroing(), z13.VnS(), 0x3f);
2120 __ Mrs(x6, NZCV);
2121 __ Cmplo(p7.VnS(), p0.Zeroing(), z13.VnS(), 0x3f3f3f3f);
2122
2123 uint64_t src4_inputs[] = {0xffffffffffffffff, 0x0000000000000000};
2124 int mask_inputs4[] = {1, 1};
2125 InsrHelper(&masm, z13.VnD(), src4_inputs);
2126 Initialise(&masm, p0.VnD(), mask_inputs4);
2127
2128 __ Cmpls(p8.VnD(), p0.Zeroing(), z13.VnD(), 0x2f);
2129 __ Mrs(x8, NZCV);
2130 __ Cmpls(p9.VnD(), p0.Zeroing(), z13.VnD(), 0x800000000000000);
2131
2132 END();
2133
2134 if (CAN_RUN()) {
2135 RUN();
2136
2137 int p2_expected[] = {1, 0, 1, 0, 1, 1, 0, 1};
2138 ASSERT_EQUAL_SVE(p2_expected, p2.VnB());
2139
2140 int p3_expected[] = {1, 0, 0, 0, 0, 0, 0, 1};
2141 ASSERT_EQUAL_SVE(p3_expected, p3.VnB());
2142
2143 int p4_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2144 ASSERT_EQUAL_SVE(p4_expected, p4.VnH());
2145
2146 int p5_expected[] = {0x1, 0x1, 0x1, 0x0, 0x0};
2147 ASSERT_EQUAL_SVE(p5_expected, p5.VnH());
2148
2149 int p6_expected[] = {0x0, 0x0, 0x0, 0x1};
2150 ASSERT_EQUAL_SVE(p6_expected, p6.VnS());
2151
2152 int p7_expected[] = {0x0, 0x0, 0x1, 0x1};
2153 ASSERT_EQUAL_SVE(p7_expected, p7.VnS());
2154
2155 int p8_expected[] = {0x00, 0x01};
2156 ASSERT_EQUAL_SVE(p8_expected, p8.VnD());
2157
2158 int p9_expected[] = {0x00, 0x01};
2159 ASSERT_EQUAL_SVE(p9_expected, p9.VnD());
2160
2161 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2162 ASSERT_EQUAL_32(NoFlag, w4);
2163 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w6);
2164 ASSERT_EQUAL_32(SVENotLastFlag | SVEFirstFlag, w8);
2165 }
2166}
2167
TatWai Chongc844bb22019-06-10 15:32:53 -07002168TEST(sve_int_compare_conditionally_terminate_scalars) {
2169 SETUP_WITH_FEATURES(CPUFeatures::kSVE);
2170 START();
2171
2172 __ Mov(x0, 0xfedcba9887654321);
2173 __ Mov(x1, 0x1000100010001000);
2174
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002175 // Initialise Z and C. These are preserved by cterm*, and the V flag is set to
2176 // !C if the condition does not hold.
2177 __ Mov(x10, NoFlag);
2178 __ Msr(NZCV, x10);
2179
TatWai Chongc844bb22019-06-10 15:32:53 -07002180 __ Ctermeq(w0, w0);
2181 __ Mrs(x2, NZCV);
2182 __ Ctermeq(x0, x1);
2183 __ Mrs(x3, NZCV);
2184 __ Ctermne(x0, x0);
2185 __ Mrs(x4, NZCV);
2186 __ Ctermne(w0, w1);
2187 __ Mrs(x5, NZCV);
2188
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002189 // As above, but with all flags initially set.
2190 __ Mov(x10, NZCVFlag);
2191 __ Msr(NZCV, x10);
2192
2193 __ Ctermeq(w0, w0);
2194 __ Mrs(x6, NZCV);
2195 __ Ctermeq(x0, x1);
2196 __ Mrs(x7, NZCV);
2197 __ Ctermne(x0, x0);
2198 __ Mrs(x8, NZCV);
2199 __ Ctermne(w0, w1);
2200 __ Mrs(x9, NZCV);
2201
TatWai Chongc844bb22019-06-10 15:32:53 -07002202 END();
2203
2204 if (CAN_RUN()) {
2205 RUN();
2206
2207 ASSERT_EQUAL_32(SVEFirstFlag, w2);
2208 ASSERT_EQUAL_32(VFlag, w3);
2209 ASSERT_EQUAL_32(VFlag, w4);
2210 ASSERT_EQUAL_32(SVEFirstFlag, w5);
Jacob Bramleyb40aa692019-10-07 19:24:29 +01002211
2212 ASSERT_EQUAL_32(SVEFirstFlag | ZCFlag, w6);
2213 ASSERT_EQUAL_32(ZCFlag, w7);
2214 ASSERT_EQUAL_32(ZCFlag, w8);
2215 ASSERT_EQUAL_32(SVEFirstFlag | ZCFlag, w9);
TatWai Chongc844bb22019-06-10 15:32:53 -07002216 }
2217}
2218
Jacob Bramley0ce75842019-07-17 18:12:50 +01002219// Work out what the architectural `PredTest` pseudocode should produce for the
2220// given result and governing predicate.
2221template <typename Tg, typename Td, int N>
2222static StatusFlags GetPredTestFlags(const Td (&pd)[N],
2223 const Tg (&pg)[N],
2224 int vl) {
2225 int first = -1;
2226 int last = -1;
2227 bool any_active = false;
2228
2229 // Only consider potentially-active lanes.
2230 int start = (N > vl) ? (N - vl) : 0;
2231 for (int i = start; i < N; i++) {
2232 if ((pg[i] & 1) == 1) {
2233 // Look for the first and last active lanes.
2234 // Note that the 'first' lane is the one with the highest index.
2235 if (last < 0) last = i;
2236 first = i;
2237 // Look for any active lanes that are also active in pd.
2238 if ((pd[i] & 1) == 1) any_active = true;
2239 }
2240 }
2241
2242 uint32_t flags = 0;
2243 if ((first >= 0) && ((pd[first] & 1) == 1)) flags |= SVEFirstFlag;
2244 if (!any_active) flags |= SVENoneFlag;
2245 if ((last < 0) || ((pd[last] & 1) == 0)) flags |= SVENotLastFlag;
2246 return static_cast<StatusFlags>(flags);
2247}
2248
2249typedef void (MacroAssembler::*PfirstPnextFn)(const PRegisterWithLaneSize& pd,
2250 const PRegister& pg,
2251 const PRegisterWithLaneSize& pn);
2252template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002253static void PfirstPnextHelper(Test* config,
2254 PfirstPnextFn macro,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002255 unsigned lane_size_in_bits,
2256 const Tg& pg_inputs,
2257 const Tn& pn_inputs,
2258 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002259 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002260 START();
2261
2262 PRegister pg = p15;
2263 PRegister pn = p14;
2264 Initialise(&masm, pg.WithLaneSize(lane_size_in_bits), pg_inputs);
2265 Initialise(&masm, pn.WithLaneSize(lane_size_in_bits), pn_inputs);
2266
2267 // Initialise NZCV to an impossible value, to check that we actually write it.
2268 __ Mov(x10, NZCVFlag);
2269
2270 // If pd.Is(pn), the MacroAssembler simply passes the arguments directly to
2271 // the Assembler.
2272 __ Msr(NZCV, x10);
2273 __ Mov(p0, pn);
2274 (masm.*macro)(p0.WithLaneSize(lane_size_in_bits),
2275 pg,
2276 p0.WithLaneSize(lane_size_in_bits));
2277 __ Mrs(x0, NZCV);
2278
2279 // The MacroAssembler supports non-destructive use.
2280 __ Msr(NZCV, x10);
2281 (masm.*macro)(p1.WithLaneSize(lane_size_in_bits),
2282 pg,
2283 pn.WithLaneSize(lane_size_in_bits));
2284 __ Mrs(x1, NZCV);
2285
2286 // If pd.Aliases(pg) the macro requires a scratch register.
2287 {
2288 UseScratchRegisterScope temps(&masm);
2289 temps.Include(p13);
2290 __ Msr(NZCV, x10);
2291 __ Mov(p2, p15);
2292 (masm.*macro)(p2.WithLaneSize(lane_size_in_bits),
2293 p2,
2294 pn.WithLaneSize(lane_size_in_bits));
2295 __ Mrs(x2, NZCV);
2296 }
2297
2298 END();
2299
2300 if (CAN_RUN()) {
2301 RUN();
2302
2303 // Check that the inputs weren't modified.
2304 ASSERT_EQUAL_SVE(pn_inputs, pn.WithLaneSize(lane_size_in_bits));
2305 ASSERT_EQUAL_SVE(pg_inputs, pg.WithLaneSize(lane_size_in_bits));
2306
2307 // Check the primary operation.
2308 ASSERT_EQUAL_SVE(pd_expected, p0.WithLaneSize(lane_size_in_bits));
2309 ASSERT_EQUAL_SVE(pd_expected, p1.WithLaneSize(lane_size_in_bits));
2310 ASSERT_EQUAL_SVE(pd_expected, p2.WithLaneSize(lane_size_in_bits));
2311
2312 // Check that the flags were properly set.
2313 StatusFlags nzcv_expected =
2314 GetPredTestFlags(pd_expected,
2315 pg_inputs,
2316 core.GetSVELaneCount(kBRegSize));
2317 ASSERT_EQUAL_64(nzcv_expected, x0);
2318 ASSERT_EQUAL_64(nzcv_expected, x1);
2319 ASSERT_EQUAL_64(nzcv_expected, x2);
2320 }
2321}
2322
2323template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002324static void PfirstHelper(Test* config,
2325 const Tg& pg_inputs,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002326 const Tn& pn_inputs,
2327 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002328 PfirstPnextHelper(config,
2329 &MacroAssembler::Pfirst,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002330 kBRegSize, // pfirst only accepts B-sized lanes.
2331 pg_inputs,
2332 pn_inputs,
2333 pd_expected);
2334}
2335
2336template <typename Tg, typename Tn, typename Td>
Jacob Bramleye8289202019-07-31 11:25:23 +01002337static void PnextHelper(Test* config,
2338 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002339 const Tg& pg_inputs,
2340 const Tn& pn_inputs,
2341 const Td& pd_expected) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002342 PfirstPnextHelper(config,
2343 &MacroAssembler::Pnext,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002344 lane_size_in_bits,
2345 pg_inputs,
2346 pn_inputs,
2347 pd_expected);
2348}
2349
Jacob Bramleye8289202019-07-31 11:25:23 +01002350TEST_SVE(sve_pfirst) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002351 // Provide more lanes than kPRegMinSize (to check propagation if we have a
2352 // large VL), but few enough to make the test easy to read.
2353 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2354 int in1[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2355 int in2[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2356 int in3[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2357 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2358 VIXL_ASSERT(ArrayLength(in0) > kPRegMinSize);
2359
2360 // Pfirst finds the first active lane in pg, and activates the corresponding
2361 // lane in pn (if it isn't already active).
2362
2363 // The first active lane in in1 is here. |
2364 // v
2365 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2366 int exp12[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0};
2367 int exp13[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2368 int exp14[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002369 PfirstHelper(config, in1, in0, exp10);
2370 PfirstHelper(config, in1, in2, exp12);
2371 PfirstHelper(config, in1, in3, exp13);
2372 PfirstHelper(config, in1, in4, exp14);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002373
2374 // The first active lane in in2 is here. |
2375 // v
2376 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2377 int exp21[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0};
2378 int exp23[] = {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
2379 int exp24[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
Jacob Bramleye8289202019-07-31 11:25:23 +01002380 PfirstHelper(config, in2, in0, exp20);
2381 PfirstHelper(config, in2, in1, exp21);
2382 PfirstHelper(config, in2, in3, exp23);
2383 PfirstHelper(config, in2, in4, exp24);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002384
2385 // The first active lane in in3 is here. |
2386 // v
2387 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2388 int exp31[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
2389 int exp32[] = {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1};
2390 int exp34[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002391 PfirstHelper(config, in3, in0, exp30);
2392 PfirstHelper(config, in3, in1, exp31);
2393 PfirstHelper(config, in3, in2, exp32);
2394 PfirstHelper(config, in3, in4, exp34);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002395
2396 // | The first active lane in in4 is here.
2397 // v
2398 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2399 int exp41[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2400 int exp42[] = {1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2401 int exp43[] = {1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1};
Jacob Bramleye8289202019-07-31 11:25:23 +01002402 PfirstHelper(config, in4, in0, exp40);
2403 PfirstHelper(config, in4, in1, exp41);
2404 PfirstHelper(config, in4, in2, exp42);
2405 PfirstHelper(config, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002406
2407 // If pg is all inactive, the input is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002408 PfirstHelper(config, in0, in0, in0);
2409 PfirstHelper(config, in0, in1, in1);
2410 PfirstHelper(config, in0, in2, in2);
2411 PfirstHelper(config, in0, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002412
2413 // If the values of pg and pn match, the value is passed through unchanged.
Jacob Bramleye8289202019-07-31 11:25:23 +01002414 PfirstHelper(config, in0, in0, in0);
2415 PfirstHelper(config, in1, in1, in1);
2416 PfirstHelper(config, in2, in2, in2);
2417 PfirstHelper(config, in3, in3, in3);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002418}
2419
Jacob Bramleye8289202019-07-31 11:25:23 +01002420TEST_SVE(sve_pfirst_alias) {
2421 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002422 START();
2423
2424 // Check that the Simulator behaves correctly when all arguments are aliased.
2425 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2426 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2427 int in_s[] = {0, 1, 1, 0};
2428 int in_d[] = {1, 1};
2429
2430 Initialise(&masm, p0.VnB(), in_b);
2431 Initialise(&masm, p1.VnH(), in_h);
2432 Initialise(&masm, p2.VnS(), in_s);
2433 Initialise(&masm, p3.VnD(), in_d);
2434
2435 // Initialise NZCV to an impossible value, to check that we actually write it.
2436 __ Mov(x10, NZCVFlag);
2437
2438 __ Msr(NZCV, x10);
2439 __ Pfirst(p0.VnB(), p0.VnB(), p0.VnB());
2440 __ Mrs(x0, NZCV);
2441
2442 __ Msr(NZCV, x10);
2443 __ Pfirst(p1.VnB(), p1.VnB(), p1.VnB());
2444 __ Mrs(x1, NZCV);
2445
2446 __ Msr(NZCV, x10);
2447 __ Pfirst(p2.VnB(), p2.VnB(), p2.VnB());
2448 __ Mrs(x2, NZCV);
2449
2450 __ Msr(NZCV, x10);
2451 __ Pfirst(p3.VnB(), p3.VnB(), p3.VnB());
2452 __ Mrs(x3, NZCV);
2453
2454 END();
2455
2456 if (CAN_RUN()) {
2457 RUN();
2458
2459 // The first lane from pg is already active in pdn, so the P register should
2460 // be unchanged.
2461 ASSERT_EQUAL_SVE(in_b, p0.VnB());
2462 ASSERT_EQUAL_SVE(in_h, p1.VnH());
2463 ASSERT_EQUAL_SVE(in_s, p2.VnS());
2464 ASSERT_EQUAL_SVE(in_d, p3.VnD());
2465
2466 ASSERT_EQUAL_64(SVEFirstFlag, x0);
2467 ASSERT_EQUAL_64(SVEFirstFlag, x1);
2468 ASSERT_EQUAL_64(SVEFirstFlag, x2);
2469 ASSERT_EQUAL_64(SVEFirstFlag, x3);
2470 }
2471}
2472
Jacob Bramleye8289202019-07-31 11:25:23 +01002473TEST_SVE(sve_pnext_b) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002474 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2475 // (to check propagation if we have a large VL), but few enough to make the
2476 // test easy to read.
2477 // For now, we just use kPRegMinSize so that the test works anywhere.
2478 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2479 int in1[] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
2480 int in2[] = {0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0};
2481 int in3[] = {0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1};
2482 int in4[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2483
2484 // Pnext activates the next element that is true in pg, after the last-active
2485 // element in pn. If all pn elements are false (as in in0), it starts looking
2486 // at element 0.
2487
2488 // There are no active lanes in in0, so the result is simply the first active
2489 // lane from pg.
2490 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2491 int exp10[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
2492 int exp20[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0};
2493 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
2494 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2495
2496 // The last active lane in in1 is here. |
2497 // v
2498 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2499 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2500 int exp21[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2501 int exp31[] = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2502 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2503
2504 // | The last active lane in in2 is here.
2505 // v
2506 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2507 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2508 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2509 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2510 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2511
2512 // | The last active lane in in3 is here.
2513 // v
2514 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2515 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2516 int exp23[] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2517 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2518 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2519
2520 // | The last active lane in in4 is here.
2521 // v
2522 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2523 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2524 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2525 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2526 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2527
Jacob Bramleye8289202019-07-31 11:25:23 +01002528 PnextHelper(config, kBRegSize, in0, in0, exp00);
2529 PnextHelper(config, kBRegSize, in1, in0, exp10);
2530 PnextHelper(config, kBRegSize, in2, in0, exp20);
2531 PnextHelper(config, kBRegSize, in3, in0, exp30);
2532 PnextHelper(config, kBRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002533
Jacob Bramleye8289202019-07-31 11:25:23 +01002534 PnextHelper(config, kBRegSize, in0, in1, exp01);
2535 PnextHelper(config, kBRegSize, in1, in1, exp11);
2536 PnextHelper(config, kBRegSize, in2, in1, exp21);
2537 PnextHelper(config, kBRegSize, in3, in1, exp31);
2538 PnextHelper(config, kBRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002539
Jacob Bramleye8289202019-07-31 11:25:23 +01002540 PnextHelper(config, kBRegSize, in0, in2, exp02);
2541 PnextHelper(config, kBRegSize, in1, in2, exp12);
2542 PnextHelper(config, kBRegSize, in2, in2, exp22);
2543 PnextHelper(config, kBRegSize, in3, in2, exp32);
2544 PnextHelper(config, kBRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002545
Jacob Bramleye8289202019-07-31 11:25:23 +01002546 PnextHelper(config, kBRegSize, in0, in3, exp03);
2547 PnextHelper(config, kBRegSize, in1, in3, exp13);
2548 PnextHelper(config, kBRegSize, in2, in3, exp23);
2549 PnextHelper(config, kBRegSize, in3, in3, exp33);
2550 PnextHelper(config, kBRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002551
Jacob Bramleye8289202019-07-31 11:25:23 +01002552 PnextHelper(config, kBRegSize, in0, in4, exp04);
2553 PnextHelper(config, kBRegSize, in1, in4, exp14);
2554 PnextHelper(config, kBRegSize, in2, in4, exp24);
2555 PnextHelper(config, kBRegSize, in3, in4, exp34);
2556 PnextHelper(config, kBRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002557}
2558
Jacob Bramleye8289202019-07-31 11:25:23 +01002559TEST_SVE(sve_pnext_h) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002560 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2561 // (to check propagation if we have a large VL), but few enough to make the
2562 // test easy to read.
2563 // For now, we just use kPRegMinSize so that the test works anywhere.
2564 int in0[] = {0, 0, 0, 0, 0, 0, 0, 0};
2565 int in1[] = {0, 0, 0, 1, 0, 2, 1, 0};
2566 int in2[] = {0, 1, 2, 0, 2, 0, 2, 0};
2567 int in3[] = {0, 0, 0, 3, 0, 0, 0, 3};
2568 int in4[] = {3, 0, 0, 0, 0, 0, 0, 0};
2569
2570 // Pnext activates the next element that is true in pg, after the last-active
2571 // element in pn. If all pn elements are false (as in in0), it starts looking
2572 // at element 0.
2573 //
2574 // As for other SVE instructions, elements are only considered to be active if
2575 // the _first_ bit in each field is one. Other bits are ignored.
2576
2577 // There are no active lanes in in0, so the result is simply the first active
2578 // lane from pg.
2579 int exp00[] = {0, 0, 0, 0, 0, 0, 0, 0};
2580 int exp10[] = {0, 0, 0, 0, 0, 0, 1, 0};
2581 int exp20[] = {0, 1, 0, 0, 0, 0, 0, 0};
2582 int exp30[] = {0, 0, 0, 0, 0, 0, 0, 1};
2583 int exp40[] = {1, 0, 0, 0, 0, 0, 0, 0};
2584
2585 // | The last active lane in in1 is here.
2586 // v
2587 int exp01[] = {0, 0, 0, 0, 0, 0, 0, 0};
2588 int exp11[] = {0, 0, 0, 0, 0, 0, 0, 0};
2589 int exp21[] = {0, 1, 0, 0, 0, 0, 0, 0};
2590 int exp31[] = {0, 0, 0, 0, 0, 0, 0, 0};
2591 int exp41[] = {1, 0, 0, 0, 0, 0, 0, 0};
2592
2593 // | The last active lane in in2 is here.
2594 // v
2595 int exp02[] = {0, 0, 0, 0, 0, 0, 0, 0};
2596 int exp12[] = {0, 0, 0, 0, 0, 0, 0, 0};
2597 int exp22[] = {0, 0, 0, 0, 0, 0, 0, 0};
2598 int exp32[] = {0, 0, 0, 0, 0, 0, 0, 0};
2599 int exp42[] = {1, 0, 0, 0, 0, 0, 0, 0};
2600
2601 // | The last active lane in in3 is here.
2602 // v
2603 int exp03[] = {0, 0, 0, 0, 0, 0, 0, 0};
2604 int exp13[] = {0, 0, 0, 0, 0, 0, 0, 0};
2605 int exp23[] = {0, 1, 0, 0, 0, 0, 0, 0};
2606 int exp33[] = {0, 0, 0, 0, 0, 0, 0, 0};
2607 int exp43[] = {1, 0, 0, 0, 0, 0, 0, 0};
2608
2609 // | The last active lane in in4 is here.
2610 // v
2611 int exp04[] = {0, 0, 0, 0, 0, 0, 0, 0};
2612 int exp14[] = {0, 0, 0, 0, 0, 0, 0, 0};
2613 int exp24[] = {0, 0, 0, 0, 0, 0, 0, 0};
2614 int exp34[] = {0, 0, 0, 0, 0, 0, 0, 0};
2615 int exp44[] = {0, 0, 0, 0, 0, 0, 0, 0};
2616
Jacob Bramleye8289202019-07-31 11:25:23 +01002617 PnextHelper(config, kHRegSize, in0, in0, exp00);
2618 PnextHelper(config, kHRegSize, in1, in0, exp10);
2619 PnextHelper(config, kHRegSize, in2, in0, exp20);
2620 PnextHelper(config, kHRegSize, in3, in0, exp30);
2621 PnextHelper(config, kHRegSize, in4, in0, exp40);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002622
Jacob Bramleye8289202019-07-31 11:25:23 +01002623 PnextHelper(config, kHRegSize, in0, in1, exp01);
2624 PnextHelper(config, kHRegSize, in1, in1, exp11);
2625 PnextHelper(config, kHRegSize, in2, in1, exp21);
2626 PnextHelper(config, kHRegSize, in3, in1, exp31);
2627 PnextHelper(config, kHRegSize, in4, in1, exp41);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002628
Jacob Bramleye8289202019-07-31 11:25:23 +01002629 PnextHelper(config, kHRegSize, in0, in2, exp02);
2630 PnextHelper(config, kHRegSize, in1, in2, exp12);
2631 PnextHelper(config, kHRegSize, in2, in2, exp22);
2632 PnextHelper(config, kHRegSize, in3, in2, exp32);
2633 PnextHelper(config, kHRegSize, in4, in2, exp42);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002634
Jacob Bramleye8289202019-07-31 11:25:23 +01002635 PnextHelper(config, kHRegSize, in0, in3, exp03);
2636 PnextHelper(config, kHRegSize, in1, in3, exp13);
2637 PnextHelper(config, kHRegSize, in2, in3, exp23);
2638 PnextHelper(config, kHRegSize, in3, in3, exp33);
2639 PnextHelper(config, kHRegSize, in4, in3, exp43);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002640
Jacob Bramleye8289202019-07-31 11:25:23 +01002641 PnextHelper(config, kHRegSize, in0, in4, exp04);
2642 PnextHelper(config, kHRegSize, in1, in4, exp14);
2643 PnextHelper(config, kHRegSize, in2, in4, exp24);
2644 PnextHelper(config, kHRegSize, in3, in4, exp34);
2645 PnextHelper(config, kHRegSize, in4, in4, exp44);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002646}
2647
Jacob Bramleye8289202019-07-31 11:25:23 +01002648TEST_SVE(sve_pnext_s) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002649 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2650 // (to check propagation if we have a large VL), but few enough to make the
2651 // test easy to read.
2652 // For now, we just use kPRegMinSize so that the test works anywhere.
2653 int in0[] = {0xe, 0xc, 0x8, 0x0};
2654 int in1[] = {0x0, 0x2, 0x0, 0x1};
2655 int in2[] = {0x0, 0x1, 0xf, 0x0};
2656 int in3[] = {0xf, 0x0, 0x0, 0x0};
2657
2658 // Pnext activates the next element that is true in pg, after the last-active
2659 // element in pn. If all pn elements are false (as in in0), it starts looking
2660 // at element 0.
2661 //
2662 // As for other SVE instructions, elements are only considered to be active if
2663 // the _first_ bit in each field is one. Other bits are ignored.
2664
2665 // There are no active lanes in in0, so the result is simply the first active
2666 // lane from pg.
2667 int exp00[] = {0, 0, 0, 0};
2668 int exp10[] = {0, 0, 0, 1};
2669 int exp20[] = {0, 0, 1, 0};
2670 int exp30[] = {1, 0, 0, 0};
2671
2672 // | The last active lane in in1 is here.
2673 // v
2674 int exp01[] = {0, 0, 0, 0};
2675 int exp11[] = {0, 0, 0, 0};
2676 int exp21[] = {0, 0, 1, 0};
2677 int exp31[] = {1, 0, 0, 0};
2678
2679 // | The last active lane in in2 is here.
2680 // v
2681 int exp02[] = {0, 0, 0, 0};
2682 int exp12[] = {0, 0, 0, 0};
2683 int exp22[] = {0, 0, 0, 0};
2684 int exp32[] = {1, 0, 0, 0};
2685
2686 // | The last active lane in in3 is here.
2687 // v
2688 int exp03[] = {0, 0, 0, 0};
2689 int exp13[] = {0, 0, 0, 0};
2690 int exp23[] = {0, 0, 0, 0};
2691 int exp33[] = {0, 0, 0, 0};
2692
Jacob Bramleye8289202019-07-31 11:25:23 +01002693 PnextHelper(config, kSRegSize, in0, in0, exp00);
2694 PnextHelper(config, kSRegSize, in1, in0, exp10);
2695 PnextHelper(config, kSRegSize, in2, in0, exp20);
2696 PnextHelper(config, kSRegSize, in3, in0, exp30);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002697
Jacob Bramleye8289202019-07-31 11:25:23 +01002698 PnextHelper(config, kSRegSize, in0, in1, exp01);
2699 PnextHelper(config, kSRegSize, in1, in1, exp11);
2700 PnextHelper(config, kSRegSize, in2, in1, exp21);
2701 PnextHelper(config, kSRegSize, in3, in1, exp31);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002702
Jacob Bramleye8289202019-07-31 11:25:23 +01002703 PnextHelper(config, kSRegSize, in0, in2, exp02);
2704 PnextHelper(config, kSRegSize, in1, in2, exp12);
2705 PnextHelper(config, kSRegSize, in2, in2, exp22);
2706 PnextHelper(config, kSRegSize, in3, in2, exp32);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002707
Jacob Bramleye8289202019-07-31 11:25:23 +01002708 PnextHelper(config, kSRegSize, in0, in3, exp03);
2709 PnextHelper(config, kSRegSize, in1, in3, exp13);
2710 PnextHelper(config, kSRegSize, in2, in3, exp23);
2711 PnextHelper(config, kSRegSize, in3, in3, exp33);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002712}
2713
Jacob Bramleye8289202019-07-31 11:25:23 +01002714TEST_SVE(sve_pnext_d) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01002715 // TODO: Once we have the infrastructure, provide more lanes than kPRegMinSize
2716 // (to check propagation if we have a large VL), but few enough to make the
2717 // test easy to read.
2718 // For now, we just use kPRegMinSize so that the test works anywhere.
2719 int in0[] = {0xfe, 0xf0};
2720 int in1[] = {0x00, 0x55};
2721 int in2[] = {0x33, 0xff};
2722
2723 // Pnext activates the next element that is true in pg, after the last-active
2724 // element in pn. If all pn elements are false (as in in0), it starts looking
2725 // at element 0.
2726 //
2727 // As for other SVE instructions, elements are only considered to be active if
2728 // the _first_ bit in each field is one. Other bits are ignored.
2729
2730 // There are no active lanes in in0, so the result is simply the first active
2731 // lane from pg.
2732 int exp00[] = {0, 0};
2733 int exp10[] = {0, 1};
2734 int exp20[] = {0, 1};
2735
2736 // | The last active lane in in1 is here.
2737 // v
2738 int exp01[] = {0, 0};
2739 int exp11[] = {0, 0};
2740 int exp21[] = {1, 0};
2741
2742 // | The last active lane in in2 is here.
2743 // v
2744 int exp02[] = {0, 0};
2745 int exp12[] = {0, 0};
2746 int exp22[] = {0, 0};
2747
Jacob Bramleye8289202019-07-31 11:25:23 +01002748 PnextHelper(config, kDRegSize, in0, in0, exp00);
2749 PnextHelper(config, kDRegSize, in1, in0, exp10);
2750 PnextHelper(config, kDRegSize, in2, in0, exp20);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002751
Jacob Bramleye8289202019-07-31 11:25:23 +01002752 PnextHelper(config, kDRegSize, in0, in1, exp01);
2753 PnextHelper(config, kDRegSize, in1, in1, exp11);
2754 PnextHelper(config, kDRegSize, in2, in1, exp21);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002755
Jacob Bramleye8289202019-07-31 11:25:23 +01002756 PnextHelper(config, kDRegSize, in0, in2, exp02);
2757 PnextHelper(config, kDRegSize, in1, in2, exp12);
2758 PnextHelper(config, kDRegSize, in2, in2, exp22);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002759}
2760
Jacob Bramleye8289202019-07-31 11:25:23 +01002761TEST_SVE(sve_pnext_alias) {
2762 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002763 START();
2764
2765 // Check that the Simulator behaves correctly when all arguments are aliased.
2766 int in_b[] = {0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0};
2767 int in_h[] = {0, 0, 0, 0, 1, 1, 0, 0};
2768 int in_s[] = {0, 1, 1, 0};
2769 int in_d[] = {1, 1};
2770
2771 Initialise(&masm, p0.VnB(), in_b);
2772 Initialise(&masm, p1.VnH(), in_h);
2773 Initialise(&masm, p2.VnS(), in_s);
2774 Initialise(&masm, p3.VnD(), in_d);
2775
2776 // Initialise NZCV to an impossible value, to check that we actually write it.
2777 __ Mov(x10, NZCVFlag);
2778
2779 __ Msr(NZCV, x10);
2780 __ Pnext(p0.VnB(), p0.VnB(), p0.VnB());
2781 __ Mrs(x0, NZCV);
2782
2783 __ Msr(NZCV, x10);
2784 __ Pnext(p1.VnB(), p1.VnB(), p1.VnB());
2785 __ Mrs(x1, NZCV);
2786
2787 __ Msr(NZCV, x10);
2788 __ Pnext(p2.VnB(), p2.VnB(), p2.VnB());
2789 __ Mrs(x2, NZCV);
2790
2791 __ Msr(NZCV, x10);
2792 __ Pnext(p3.VnB(), p3.VnB(), p3.VnB());
2793 __ Mrs(x3, NZCV);
2794
2795 END();
2796
2797 if (CAN_RUN()) {
2798 RUN();
2799
2800 // Since pg.Is(pdn), there can be no active lanes in pg above the last
2801 // active lane in pdn, so the result should always be zero.
2802 ASSERT_EQUAL_SVE(0, p0.VnB());
2803 ASSERT_EQUAL_SVE(0, p1.VnH());
2804 ASSERT_EQUAL_SVE(0, p2.VnS());
2805 ASSERT_EQUAL_SVE(0, p3.VnD());
2806
2807 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x0);
2808 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x1);
2809 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x2);
2810 ASSERT_EQUAL_64(SVENoneFlag | SVENotLastFlag, x3);
2811 }
2812}
2813
Jacob Bramleye8289202019-07-31 11:25:23 +01002814static void PtrueHelper(Test* config,
2815 unsigned lane_size_in_bits,
Jacob Bramley0ce75842019-07-17 18:12:50 +01002816 FlagsUpdate s = LeaveFlags) {
Jacob Bramleye8289202019-07-31 11:25:23 +01002817 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002818 START();
2819
2820 PRegisterWithLaneSize p[kNumberOfPRegisters];
2821 for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
2822 p[i] = PRegister(i).WithLaneSize(lane_size_in_bits);
2823 }
2824
2825 // Initialise NZCV to an impossible value, to check that we actually write it.
2826 StatusFlags nzcv_unmodified = NZCVFlag;
2827 __ Mov(x20, nzcv_unmodified);
2828
2829 // We don't have enough registers to conveniently test every pattern, so take
2830 // samples from each group.
2831 __ Msr(NZCV, x20);
2832 __ Ptrue(p[0], SVE_POW2, s);
2833 __ Mrs(x0, NZCV);
2834
2835 __ Msr(NZCV, x20);
2836 __ Ptrue(p[1], SVE_VL1, s);
2837 __ Mrs(x1, NZCV);
2838
2839 __ Msr(NZCV, x20);
2840 __ Ptrue(p[2], SVE_VL2, s);
2841 __ Mrs(x2, NZCV);
2842
2843 __ Msr(NZCV, x20);
2844 __ Ptrue(p[3], SVE_VL5, s);
2845 __ Mrs(x3, NZCV);
2846
2847 __ Msr(NZCV, x20);
2848 __ Ptrue(p[4], SVE_VL6, s);
2849 __ Mrs(x4, NZCV);
2850
2851 __ Msr(NZCV, x20);
2852 __ Ptrue(p[5], SVE_VL8, s);
2853 __ Mrs(x5, NZCV);
2854
2855 __ Msr(NZCV, x20);
2856 __ Ptrue(p[6], SVE_VL16, s);
2857 __ Mrs(x6, NZCV);
2858
2859 __ Msr(NZCV, x20);
2860 __ Ptrue(p[7], SVE_VL64, s);
2861 __ Mrs(x7, NZCV);
2862
2863 __ Msr(NZCV, x20);
2864 __ Ptrue(p[8], SVE_VL256, s);
2865 __ Mrs(x8, NZCV);
2866
2867 {
2868 // We have to use the Assembler to use values not defined by
2869 // SVEPredicateConstraint, so call `ptrues` directly..
2870 typedef void (
2871 MacroAssembler::*AssemblePtrueFn)(const PRegisterWithLaneSize& pd,
2872 int pattern);
2873 AssemblePtrueFn assemble =
2874 (s == SetFlags) ? &MacroAssembler::ptrues : &MacroAssembler::ptrue;
2875
2876 ExactAssemblyScope guard(&masm, 12 * kInstructionSize);
2877 __ msr(NZCV, x20);
2878 (masm.*assemble)(p[9], 0xe);
2879 __ mrs(x9, NZCV);
2880
2881 __ msr(NZCV, x20);
2882 (masm.*assemble)(p[10], 0x16);
2883 __ mrs(x10, NZCV);
2884
2885 __ msr(NZCV, x20);
2886 (masm.*assemble)(p[11], 0x1a);
2887 __ mrs(x11, NZCV);
2888
2889 __ msr(NZCV, x20);
2890 (masm.*assemble)(p[12], 0x1c);
2891 __ mrs(x12, NZCV);
2892 }
2893
2894 __ Msr(NZCV, x20);
2895 __ Ptrue(p[13], SVE_MUL4, s);
2896 __ Mrs(x13, NZCV);
2897
2898 __ Msr(NZCV, x20);
2899 __ Ptrue(p[14], SVE_MUL3, s);
2900 __ Mrs(x14, NZCV);
2901
2902 __ Msr(NZCV, x20);
2903 __ Ptrue(p[15], SVE_ALL, s);
2904 __ Mrs(x15, NZCV);
2905
2906 END();
2907
2908 if (CAN_RUN()) {
2909 RUN();
2910
2911 int all = core.GetSVELaneCount(lane_size_in_bits);
2912 int pow2 = 1 << HighestSetBitPosition(all);
2913 int mul4 = all - (all % 4);
2914 int mul3 = all - (all % 3);
2915
2916 // Check P register results.
2917 for (int i = 0; i < all; i++) {
2918 ASSERT_EQUAL_SVE_LANE(i < pow2, p[0], i);
2919 ASSERT_EQUAL_SVE_LANE((all >= 1) && (i < 1), p[1], i);
2920 ASSERT_EQUAL_SVE_LANE((all >= 2) && (i < 2), p[2], i);
2921 ASSERT_EQUAL_SVE_LANE((all >= 5) && (i < 5), p[3], i);
2922 ASSERT_EQUAL_SVE_LANE((all >= 6) && (i < 6), p[4], i);
2923 ASSERT_EQUAL_SVE_LANE((all >= 8) && (i < 8), p[5], i);
2924 ASSERT_EQUAL_SVE_LANE((all >= 16) && (i < 16), p[6], i);
2925 ASSERT_EQUAL_SVE_LANE((all >= 64) && (i < 64), p[7], i);
2926 ASSERT_EQUAL_SVE_LANE((all >= 256) && (i < 256), p[8], i);
2927 ASSERT_EQUAL_SVE_LANE(false, p[9], i);
2928 ASSERT_EQUAL_SVE_LANE(false, p[10], i);
2929 ASSERT_EQUAL_SVE_LANE(false, p[11], i);
2930 ASSERT_EQUAL_SVE_LANE(false, p[12], i);
2931 ASSERT_EQUAL_SVE_LANE(i < mul4, p[13], i);
2932 ASSERT_EQUAL_SVE_LANE(i < mul3, p[14], i);
2933 ASSERT_EQUAL_SVE_LANE(true, p[15], i);
2934 }
2935
2936 // Check NZCV results.
2937 if (s == LeaveFlags) {
2938 // No flags should have been updated.
2939 for (int i = 0; i <= 15; i++) {
2940 ASSERT_EQUAL_64(nzcv_unmodified, XRegister(i));
2941 }
2942 } else {
2943 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
2944 StatusFlags nonzero = SVEFirstFlag;
2945
2946 // POW2
2947 ASSERT_EQUAL_64(nonzero, x0);
2948 // VL*
2949 ASSERT_EQUAL_64((all >= 1) ? nonzero : zero, x1);
2950 ASSERT_EQUAL_64((all >= 2) ? nonzero : zero, x2);
2951 ASSERT_EQUAL_64((all >= 5) ? nonzero : zero, x3);
2952 ASSERT_EQUAL_64((all >= 6) ? nonzero : zero, x4);
2953 ASSERT_EQUAL_64((all >= 8) ? nonzero : zero, x5);
2954 ASSERT_EQUAL_64((all >= 16) ? nonzero : zero, x6);
2955 ASSERT_EQUAL_64((all >= 64) ? nonzero : zero, x7);
2956 ASSERT_EQUAL_64((all >= 256) ? nonzero : zero, x8);
2957 // #uimm5
2958 ASSERT_EQUAL_64(zero, x9);
2959 ASSERT_EQUAL_64(zero, x10);
2960 ASSERT_EQUAL_64(zero, x11);
2961 ASSERT_EQUAL_64(zero, x12);
2962 // MUL*
2963 ASSERT_EQUAL_64((all >= 4) ? nonzero : zero, x13);
2964 ASSERT_EQUAL_64((all >= 3) ? nonzero : zero, x14);
2965 // ALL
2966 ASSERT_EQUAL_64(nonzero, x15);
2967 }
2968 }
2969}
2970
Jacob Bramleye8289202019-07-31 11:25:23 +01002971TEST_SVE(sve_ptrue_b) { PtrueHelper(config, kBRegSize, LeaveFlags); }
2972TEST_SVE(sve_ptrue_h) { PtrueHelper(config, kHRegSize, LeaveFlags); }
2973TEST_SVE(sve_ptrue_s) { PtrueHelper(config, kSRegSize, LeaveFlags); }
2974TEST_SVE(sve_ptrue_d) { PtrueHelper(config, kDRegSize, LeaveFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01002975
Jacob Bramleye8289202019-07-31 11:25:23 +01002976TEST_SVE(sve_ptrues_b) { PtrueHelper(config, kBRegSize, SetFlags); }
2977TEST_SVE(sve_ptrues_h) { PtrueHelper(config, kHRegSize, SetFlags); }
2978TEST_SVE(sve_ptrues_s) { PtrueHelper(config, kSRegSize, SetFlags); }
2979TEST_SVE(sve_ptrues_d) { PtrueHelper(config, kDRegSize, SetFlags); }
Jacob Bramley0ce75842019-07-17 18:12:50 +01002980
Jacob Bramleye8289202019-07-31 11:25:23 +01002981TEST_SVE(sve_pfalse) {
2982 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01002983 START();
2984
2985 // Initialise non-zero inputs.
2986 __ Ptrue(p0.VnB());
2987 __ Ptrue(p1.VnH());
2988 __ Ptrue(p2.VnS());
2989 __ Ptrue(p3.VnD());
2990
2991 // The instruction only supports B-sized lanes, but the lane size has no
2992 // logical effect, so the MacroAssembler accepts anything.
2993 __ Pfalse(p0.VnB());
2994 __ Pfalse(p1.VnH());
2995 __ Pfalse(p2.VnS());
2996 __ Pfalse(p3.VnD());
2997
2998 END();
2999
3000 if (CAN_RUN()) {
3001 RUN();
3002
3003 ASSERT_EQUAL_SVE(0, p0.VnB());
3004 ASSERT_EQUAL_SVE(0, p1.VnB());
3005 ASSERT_EQUAL_SVE(0, p2.VnB());
3006 ASSERT_EQUAL_SVE(0, p3.VnB());
3007 }
3008}
3009
Jacob Bramleye8289202019-07-31 11:25:23 +01003010TEST_SVE(sve_ptest) {
3011 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley0ce75842019-07-17 18:12:50 +01003012 START();
3013
3014 // Initialise NZCV to a known (impossible) value.
3015 StatusFlags nzcv_unmodified = NZCVFlag;
3016 __ Mov(x0, nzcv_unmodified);
3017 __ Msr(NZCV, x0);
3018
3019 // Construct some test inputs.
3020 int in2[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0};
3021 int in3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0};
3022 int in4[] = {0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0};
3023 __ Pfalse(p0.VnB());
3024 __ Ptrue(p1.VnB());
3025 Initialise(&masm, p2.VnB(), in2);
3026 Initialise(&masm, p3.VnB(), in3);
3027 Initialise(&masm, p4.VnB(), in4);
3028
3029 // All-inactive pg.
3030 __ Ptest(p0, p0.VnB());
3031 __ Mrs(x0, NZCV);
3032 __ Ptest(p0, p1.VnB());
3033 __ Mrs(x1, NZCV);
3034 __ Ptest(p0, p2.VnB());
3035 __ Mrs(x2, NZCV);
3036 __ Ptest(p0, p3.VnB());
3037 __ Mrs(x3, NZCV);
3038 __ Ptest(p0, p4.VnB());
3039 __ Mrs(x4, NZCV);
3040
3041 // All-active pg.
3042 __ Ptest(p1, p0.VnB());
3043 __ Mrs(x5, NZCV);
3044 __ Ptest(p1, p1.VnB());
3045 __ Mrs(x6, NZCV);
3046 __ Ptest(p1, p2.VnB());
3047 __ Mrs(x7, NZCV);
3048 __ Ptest(p1, p3.VnB());
3049 __ Mrs(x8, NZCV);
3050 __ Ptest(p1, p4.VnB());
3051 __ Mrs(x9, NZCV);
3052
3053 // Combinations of other inputs.
3054 __ Ptest(p2, p2.VnB());
3055 __ Mrs(x20, NZCV);
3056 __ Ptest(p2, p3.VnB());
3057 __ Mrs(x21, NZCV);
3058 __ Ptest(p2, p4.VnB());
3059 __ Mrs(x22, NZCV);
3060 __ Ptest(p3, p2.VnB());
3061 __ Mrs(x23, NZCV);
3062 __ Ptest(p3, p3.VnB());
3063 __ Mrs(x24, NZCV);
3064 __ Ptest(p3, p4.VnB());
3065 __ Mrs(x25, NZCV);
3066 __ Ptest(p4, p2.VnB());
3067 __ Mrs(x26, NZCV);
3068 __ Ptest(p4, p3.VnB());
3069 __ Mrs(x27, NZCV);
3070 __ Ptest(p4, p4.VnB());
3071 __ Mrs(x28, NZCV);
3072
3073 END();
3074
3075 if (CAN_RUN()) {
3076 RUN();
3077
3078 StatusFlags zero = static_cast<StatusFlags>(SVENoneFlag | SVENotLastFlag);
3079
3080 // If pg is all inactive, the value of pn is irrelevant.
3081 ASSERT_EQUAL_64(zero, x0);
3082 ASSERT_EQUAL_64(zero, x1);
3083 ASSERT_EQUAL_64(zero, x2);
3084 ASSERT_EQUAL_64(zero, x3);
3085 ASSERT_EQUAL_64(zero, x4);
3086
3087 // All-active pg.
3088 ASSERT_EQUAL_64(zero, x5); // All-inactive pn.
3089 ASSERT_EQUAL_64(SVEFirstFlag, x6); // All-active pn.
3090 // Other pn inputs are non-zero, but the first and last lanes are inactive.
3091 ASSERT_EQUAL_64(SVENotLastFlag, x7);
3092 ASSERT_EQUAL_64(SVENotLastFlag, x8);
3093 ASSERT_EQUAL_64(SVENotLastFlag, x9);
3094
3095 // Other inputs.
3096 ASSERT_EQUAL_64(SVEFirstFlag, x20); // pg: in2, pn: in2
3097 ASSERT_EQUAL_64(NoFlag, x21); // pg: in2, pn: in3
3098 ASSERT_EQUAL_64(zero, x22); // pg: in2, pn: in4
3099 ASSERT_EQUAL_64(static_cast<StatusFlags>(SVEFirstFlag | SVENotLastFlag),
3100 x23); // pg: in3, pn: in2
3101 ASSERT_EQUAL_64(SVEFirstFlag, x24); // pg: in3, pn: in3
3102 ASSERT_EQUAL_64(zero, x25); // pg: in3, pn: in4
3103 ASSERT_EQUAL_64(zero, x26); // pg: in4, pn: in2
3104 ASSERT_EQUAL_64(zero, x27); // pg: in4, pn: in3
3105 ASSERT_EQUAL_64(SVEFirstFlag, x28); // pg: in4, pn: in4
3106 }
3107}
3108
Jacob Bramleye8289202019-07-31 11:25:23 +01003109TEST_SVE(sve_cntp) {
3110 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramleyd961a0c2019-07-17 10:53:45 +01003111 START();
3112
3113 // There are {7, 5, 2, 1} active {B, H, S, D} lanes.
3114 int p0_inputs[] = {0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0};
3115 Initialise(&masm, p0.VnB(), p0_inputs);
3116
3117 // With an all-true predicate, these instructions measure the vector length.
3118 __ Ptrue(p10.VnB());
3119 __ Ptrue(p11.VnH());
3120 __ Ptrue(p12.VnS());
3121 __ Ptrue(p13.VnD());
3122
3123 // `ptrue p10.b` provides an all-active pg.
3124 __ Cntp(x10, p10, p10.VnB());
3125 __ Cntp(x11, p10, p11.VnH());
3126 __ Cntp(x12, p10, p12.VnS());
3127 __ Cntp(x13, p10, p13.VnD());
3128
3129 // Check that the predicate mask is applied properly.
3130 __ Cntp(x14, p10, p10.VnB());
3131 __ Cntp(x15, p11, p10.VnB());
3132 __ Cntp(x16, p12, p10.VnB());
3133 __ Cntp(x17, p13, p10.VnB());
3134
3135 // Check other patterns (including some ignored bits).
3136 __ Cntp(x0, p10, p0.VnB());
3137 __ Cntp(x1, p10, p0.VnH());
3138 __ Cntp(x2, p10, p0.VnS());
3139 __ Cntp(x3, p10, p0.VnD());
3140 __ Cntp(x4, p0, p10.VnB());
3141 __ Cntp(x5, p0, p10.VnH());
3142 __ Cntp(x6, p0, p10.VnS());
3143 __ Cntp(x7, p0, p10.VnD());
3144
3145 END();
3146
3147 if (CAN_RUN()) {
3148 RUN();
3149
3150 int vl_b = core.GetSVELaneCount(kBRegSize);
3151 int vl_h = core.GetSVELaneCount(kHRegSize);
3152 int vl_s = core.GetSVELaneCount(kSRegSize);
3153 int vl_d = core.GetSVELaneCount(kDRegSize);
3154
3155 // Check all-active predicates in various combinations.
3156 ASSERT_EQUAL_64(vl_b, x10);
3157 ASSERT_EQUAL_64(vl_h, x11);
3158 ASSERT_EQUAL_64(vl_s, x12);
3159 ASSERT_EQUAL_64(vl_d, x13);
3160
3161 ASSERT_EQUAL_64(vl_b, x14);
3162 ASSERT_EQUAL_64(vl_h, x15);
3163 ASSERT_EQUAL_64(vl_s, x16);
3164 ASSERT_EQUAL_64(vl_d, x17);
3165
3166 // Check that irrelevant bits are properly ignored.
3167 ASSERT_EQUAL_64(7, x0);
3168 ASSERT_EQUAL_64(5, x1);
3169 ASSERT_EQUAL_64(2, x2);
3170 ASSERT_EQUAL_64(1, x3);
3171
3172 ASSERT_EQUAL_64(7, x4);
3173 ASSERT_EQUAL_64(5, x5);
3174 ASSERT_EQUAL_64(2, x6);
3175 ASSERT_EQUAL_64(1, x7);
3176 }
3177}
3178
Martyn Capewell74f84f62019-10-30 15:30:44 +00003179typedef void (MacroAssembler::*CntFn)(const Register& dst,
3180 int pattern,
3181 int multiplier);
3182
3183static void CntHelper(Test* config,
3184 CntFn cnt,
3185 int multiplier,
Martyn Capewell579c92d2019-10-30 17:48:52 +00003186 int lane_size_in_bits,
3187 int64_t acc_value = 0,
3188 bool is_increment = true) {
Martyn Capewell74f84f62019-10-30 15:30:44 +00003189 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3190 START();
3191
Martyn Capewell579c92d2019-10-30 17:48:52 +00003192 // Initialise accumulators.
3193 __ Mov(x0, acc_value);
3194 __ Mov(x1, acc_value);
3195 __ Mov(x2, acc_value);
3196 __ Mov(x3, acc_value);
3197 __ Mov(x4, acc_value);
3198 __ Mov(x5, acc_value);
3199 __ Mov(x6, acc_value);
3200 __ Mov(x7, acc_value);
3201 __ Mov(x8, acc_value);
3202 __ Mov(x9, acc_value);
3203 __ Mov(x10, acc_value);
3204 __ Mov(x11, acc_value);
3205 __ Mov(x12, acc_value);
3206 __ Mov(x13, acc_value);
3207 __ Mov(x14, acc_value);
3208 __ Mov(x15, acc_value);
3209 __ Mov(x18, acc_value);
3210 __ Mov(x19, acc_value);
3211 __ Mov(x20, acc_value);
3212 __ Mov(x21, acc_value);
3213
3214 (masm.*cnt)(x0, SVE_POW2, multiplier);
Martyn Capewell74f84f62019-10-30 15:30:44 +00003215 (masm.*cnt)(x1, SVE_VL1, multiplier);
3216 (masm.*cnt)(x2, SVE_VL2, multiplier);
3217 (masm.*cnt)(x3, SVE_VL3, multiplier);
3218 (masm.*cnt)(x4, SVE_VL4, multiplier);
3219 (masm.*cnt)(x5, SVE_VL5, multiplier);
3220 (masm.*cnt)(x6, SVE_VL6, multiplier);
3221 (masm.*cnt)(x7, SVE_VL7, multiplier);
3222 (masm.*cnt)(x8, SVE_VL8, multiplier);
3223 (masm.*cnt)(x9, SVE_VL16, multiplier);
3224 (masm.*cnt)(x10, SVE_VL32, multiplier);
3225 (masm.*cnt)(x11, SVE_VL64, multiplier);
3226 (masm.*cnt)(x12, SVE_VL128, multiplier);
3227 (masm.*cnt)(x13, SVE_VL256, multiplier);
3228 (masm.*cnt)(x14, 16, multiplier);
3229 (masm.*cnt)(x15, 23, multiplier);
3230 (masm.*cnt)(x18, 28, multiplier);
3231 (masm.*cnt)(x19, SVE_MUL4, multiplier);
3232 (masm.*cnt)(x20, SVE_MUL3, multiplier);
3233 (masm.*cnt)(x21, SVE_ALL, multiplier);
3234
3235 END();
3236
3237 if (CAN_RUN()) {
3238 RUN();
3239
3240 int all = core.GetSVELaneCount(lane_size_in_bits);
3241 int pow2 = 1 << HighestSetBitPosition(all);
3242 int mul4 = all - (all % 4);
3243 int mul3 = all - (all % 3);
3244
Martyn Capewell579c92d2019-10-30 17:48:52 +00003245 multiplier = is_increment ? multiplier : -multiplier;
3246
3247 ASSERT_EQUAL_64(acc_value + (multiplier * pow2), x0);
3248 ASSERT_EQUAL_64(acc_value + (multiplier * (all >= 1 ? 1 : 0)), x1);
3249 ASSERT_EQUAL_64(acc_value + (multiplier * (all >= 2 ? 2 : 0)), x2);
3250 ASSERT_EQUAL_64(acc_value + (multiplier * (all >= 3 ? 3 : 0)), x3);
3251 ASSERT_EQUAL_64(acc_value + (multiplier * (all >= 4 ? 4 : 0)), x4);
3252 ASSERT_EQUAL_64(acc_value + (multiplier * (all >= 5 ? 5 : 0)), x5);
3253 ASSERT_EQUAL_64(acc_value + (multiplier * (all >= 6 ? 6 : 0)), x6);
3254 ASSERT_EQUAL_64(acc_value + (multiplier * (all >= 7 ? 7 : 0)), x7);
3255 ASSERT_EQUAL_64(acc_value + (multiplier * (all >= 8 ? 8 : 0)), x8);
3256 ASSERT_EQUAL_64(acc_value + (multiplier * (all >= 16 ? 16 : 0)), x9);
3257 ASSERT_EQUAL_64(acc_value + (multiplier * (all >= 32 ? 32 : 0)), x10);
3258 ASSERT_EQUAL_64(acc_value + (multiplier * (all >= 64 ? 64 : 0)), x11);
3259 ASSERT_EQUAL_64(acc_value + (multiplier * (all >= 128 ? 128 : 0)), x12);
3260 ASSERT_EQUAL_64(acc_value + (multiplier * (all >= 256 ? 256 : 0)), x13);
3261 ASSERT_EQUAL_64(acc_value, x14);
3262 ASSERT_EQUAL_64(acc_value, x15);
3263 ASSERT_EQUAL_64(acc_value, x18);
3264 ASSERT_EQUAL_64(acc_value + (multiplier * mul4), x19);
3265 ASSERT_EQUAL_64(acc_value + (multiplier * mul3), x20);
3266 ASSERT_EQUAL_64(acc_value + (multiplier * all), x21);
Martyn Capewell74f84f62019-10-30 15:30:44 +00003267 }
3268}
3269
Martyn Capewell579c92d2019-10-30 17:48:52 +00003270static void IncHelper(Test* config,
3271 CntFn cnt,
3272 int multiplier,
3273 int lane_size_in_bits,
3274 int64_t acc_value) {
3275 CntHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, true);
3276}
3277
3278static void DecHelper(Test* config,
3279 CntFn cnt,
3280 int multiplier,
3281 int lane_size_in_bits,
3282 int64_t acc_value) {
3283 CntHelper(config, cnt, multiplier, lane_size_in_bits, acc_value, false);
3284}
3285
Martyn Capewell74f84f62019-10-30 15:30:44 +00003286TEST_SVE(sve_cntb) {
3287 CntHelper(config, &MacroAssembler::Cntb, 1, kBRegSize);
3288 CntHelper(config, &MacroAssembler::Cntb, 2, kBRegSize);
3289 CntHelper(config, &MacroAssembler::Cntb, 15, kBRegSize);
3290 CntHelper(config, &MacroAssembler::Cntb, 16, kBRegSize);
3291}
3292
3293TEST_SVE(sve_cnth) {
3294 CntHelper(config, &MacroAssembler::Cnth, 1, kHRegSize);
3295 CntHelper(config, &MacroAssembler::Cnth, 2, kHRegSize);
3296 CntHelper(config, &MacroAssembler::Cnth, 15, kHRegSize);
3297 CntHelper(config, &MacroAssembler::Cnth, 16, kHRegSize);
3298}
3299
3300TEST_SVE(sve_cntw) {
3301 CntHelper(config, &MacroAssembler::Cntw, 1, kWRegSize);
3302 CntHelper(config, &MacroAssembler::Cntw, 2, kWRegSize);
3303 CntHelper(config, &MacroAssembler::Cntw, 15, kWRegSize);
3304 CntHelper(config, &MacroAssembler::Cntw, 16, kWRegSize);
3305}
3306
3307TEST_SVE(sve_cntd) {
3308 CntHelper(config, &MacroAssembler::Cntd, 1, kDRegSize);
3309 CntHelper(config, &MacroAssembler::Cntd, 2, kDRegSize);
3310 CntHelper(config, &MacroAssembler::Cntd, 15, kDRegSize);
3311 CntHelper(config, &MacroAssembler::Cntd, 16, kDRegSize);
3312}
3313
Martyn Capewell579c92d2019-10-30 17:48:52 +00003314TEST_SVE(sve_decb) {
3315 DecHelper(config, &MacroAssembler::Decb, 1, kBRegSize, 42);
3316 DecHelper(config, &MacroAssembler::Decb, 2, kBRegSize, -1);
3317 DecHelper(config, &MacroAssembler::Decb, 15, kBRegSize, INT64_MIN);
3318 DecHelper(config, &MacroAssembler::Decb, 16, kBRegSize, -42);
3319}
3320
3321TEST_SVE(sve_dech) {
3322 DecHelper(config, &MacroAssembler::Dech, 1, kHRegSize, 42);
3323 DecHelper(config, &MacroAssembler::Dech, 2, kHRegSize, -1);
3324 DecHelper(config, &MacroAssembler::Dech, 15, kHRegSize, INT64_MIN);
3325 DecHelper(config, &MacroAssembler::Dech, 16, kHRegSize, -42);
3326}
3327
3328TEST_SVE(sve_decw) {
3329 DecHelper(config, &MacroAssembler::Decw, 1, kWRegSize, 42);
3330 DecHelper(config, &MacroAssembler::Decw, 2, kWRegSize, -1);
3331 DecHelper(config, &MacroAssembler::Decw, 15, kWRegSize, INT64_MIN);
3332 DecHelper(config, &MacroAssembler::Decw, 16, kWRegSize, -42);
3333}
3334
3335TEST_SVE(sve_decd) {
3336 DecHelper(config, &MacroAssembler::Decd, 1, kDRegSize, 42);
3337 DecHelper(config, &MacroAssembler::Decd, 2, kDRegSize, -1);
3338 DecHelper(config, &MacroAssembler::Decd, 15, kDRegSize, INT64_MIN);
3339 DecHelper(config, &MacroAssembler::Decd, 16, kDRegSize, -42);
3340}
3341
3342TEST_SVE(sve_incb) {
3343 IncHelper(config, &MacroAssembler::Incb, 1, kBRegSize, 42);
3344 IncHelper(config, &MacroAssembler::Incb, 2, kBRegSize, -1);
3345 IncHelper(config, &MacroAssembler::Incb, 15, kBRegSize, INT64_MAX);
3346 IncHelper(config, &MacroAssembler::Incb, 16, kBRegSize, -42);
3347}
3348
3349TEST_SVE(sve_inch) {
3350 IncHelper(config, &MacroAssembler::Inch, 1, kHRegSize, 42);
3351 IncHelper(config, &MacroAssembler::Inch, 2, kHRegSize, -1);
3352 IncHelper(config, &MacroAssembler::Inch, 15, kHRegSize, INT64_MAX);
3353 IncHelper(config, &MacroAssembler::Inch, 16, kHRegSize, -42);
3354}
3355
3356TEST_SVE(sve_incw) {
3357 IncHelper(config, &MacroAssembler::Incw, 1, kWRegSize, 42);
3358 IncHelper(config, &MacroAssembler::Incw, 2, kWRegSize, -1);
3359 IncHelper(config, &MacroAssembler::Incw, 15, kWRegSize, INT64_MAX);
3360 IncHelper(config, &MacroAssembler::Incw, 16, kWRegSize, -42);
3361}
3362
3363TEST_SVE(sve_incd) {
3364 IncHelper(config, &MacroAssembler::Incd, 1, kDRegSize, 42);
3365 IncHelper(config, &MacroAssembler::Incd, 2, kDRegSize, -1);
3366 IncHelper(config, &MacroAssembler::Incd, 15, kDRegSize, INT64_MAX);
3367 IncHelper(config, &MacroAssembler::Incd, 16, kDRegSize, -42);
3368}
3369
TatWai Chong7a0d3672019-10-23 17:35:18 -07003370typedef void (MacroAssembler::*ArithPredicatedFn)(const ZRegister& zd,
3371 const PRegisterM& pg,
3372 const ZRegister& zn,
3373 const ZRegister& zm);
TatWai Chong13634762019-07-16 16:20:45 -07003374
3375template <typename Td, typename Tg, typename Tn>
3376static void IntBinArithHelper(Test* config,
TatWai Chong7a0d3672019-10-23 17:35:18 -07003377 ArithPredicatedFn macro,
TatWai Chong13634762019-07-16 16:20:45 -07003378 unsigned lane_size_in_bits,
3379 const Tg& pg_inputs,
3380 const Tn& zn_inputs,
3381 const Tn& zm_inputs,
3382 const Td& zd_expected) {
3383 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3384 START();
3385
3386 ZRegister src_a = z31.WithLaneSize(lane_size_in_bits);
3387 ZRegister src_b = z27.WithLaneSize(lane_size_in_bits);
3388 InsrHelper(&masm, src_a, zn_inputs);
3389 InsrHelper(&masm, src_b, zm_inputs);
3390
3391 Initialise(&masm, p0.WithLaneSize(lane_size_in_bits), pg_inputs);
3392
3393 ZRegister zd_1 = z0.WithLaneSize(lane_size_in_bits);
3394 ZRegister zd_2 = z1.WithLaneSize(lane_size_in_bits);
3395 ZRegister zd_3 = z2.WithLaneSize(lane_size_in_bits);
3396
3397 // `instr` zd(dst), zd(src_a), zn(src_b)
3398 __ Mov(zd_1, src_a);
3399 (masm.*macro)(zd_1, p0.Merging(), zd_1, src_b);
3400
3401 // `instr` zd(dst), zm(src_a), zd(src_b)
3402 // Based on whether zd and zm registers are aliased, the macro of instructions
3403 // (`Instr`) swaps the order of operands if it has the commutative property,
3404 // otherwise, transfer to the reversed `Instr`, such as subr and divr.
3405 __ Mov(zd_2, src_b);
3406 (masm.*macro)(zd_2, p0.Merging(), src_a, zd_2);
3407
3408 // `instr` zd(dst), zm(src_a), zn(src_b)
3409 // The macro of instructions (`Instr`) automatically selects between `instr`
3410 // and movprfx + `instr` based on whether zd and zn registers are aliased.
TatWai Chongd316c5e2019-10-16 12:22:10 -07003411 // A generated movprfx instruction is predicated that using the same
TatWai Chong13634762019-07-16 16:20:45 -07003412 // governing predicate register. In order to keep the result constant,
3413 // initialize the destination register first.
3414 __ Mov(zd_3, src_a);
3415 (masm.*macro)(zd_3, p0.Merging(), src_a, src_b);
3416
3417 END();
3418
3419 if (CAN_RUN()) {
3420 RUN();
3421 ASSERT_EQUAL_SVE(zd_expected, zd_1);
3422
3423 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
3424 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
3425 if (!core.HasSVELane(zd_1, lane)) break;
TatWai Chongd316c5e2019-10-16 12:22:10 -07003426 if ((pg_inputs[i] & 1) != 0) {
TatWai Chong13634762019-07-16 16:20:45 -07003427 ASSERT_EQUAL_SVE_LANE(zd_expected[i], zd_1, lane);
3428 } else {
3429 ASSERT_EQUAL_SVE_LANE(zn_inputs[i], zd_1, lane);
3430 }
3431 }
3432
3433 ASSERT_EQUAL_SVE(zd_expected, zd_3);
3434 }
3435}
3436
3437TEST_SVE(sve_binary_arithmetic_predicated_add) {
3438 // clang-format off
3439 unsigned zn_b[] = {0x00, 0x01, 0x10, 0x81, 0xff, 0x0f, 0x01, 0x7f};
3440
3441 unsigned zm_b[] = {0x00, 0x01, 0x10, 0x00, 0x81, 0x80, 0xff, 0xff};
3442
3443 unsigned zn_h[] = {0x0000, 0x0123, 0x1010, 0x8181, 0xffff, 0x0f0f, 0x0101, 0x7f7f};
3444
3445 unsigned zm_h[] = {0x0000, 0x0123, 0x1010, 0x0000, 0x8181, 0x8080, 0xffff, 0xffff};
3446
3447 unsigned zn_s[] = {0x00000000, 0x01234567, 0x10101010, 0x81818181,
3448 0xffffffff, 0x0f0f0f0f, 0x01010101, 0x7f7f7f7f};
3449
3450 unsigned zm_s[] = {0x00000000, 0x01234567, 0x10101010, 0x00000000,
3451 0x81818181, 0x80808080, 0xffffffff, 0xffffffff};
3452
3453 uint64_t zn_d[] = {0x0000000000000000, 0x0123456789abcdef,
3454 0x1010101010101010, 0x8181818181818181,
3455 0xffffffffffffffff, 0x0f0f0f0f0f0f0f0f,
3456 0x0101010101010101, 0x7f7f7f7fffffffff};
3457
3458 uint64_t zm_d[] = {0x0000000000000000, 0x0123456789abcdef,
3459 0x1010101010101010, 0x0000000000000000,
3460 0x8181818181818181, 0x8080808080808080,
3461 0xffffffffffffffff, 0xffffffffffffffff};
3462
3463 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
3464 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
3465 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
3466 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
3467
3468 unsigned add_exp_b[] = {0x00, 0x02, 0x20, 0x81, 0x80, 0x8f, 0x00, 0x7f};
3469
3470 unsigned add_exp_h[] = {0x0000, 0x0246, 0x1010, 0x8181,
3471 0x8180, 0x8f8f, 0x0101, 0x7f7e};
3472
3473 unsigned add_exp_s[] = {0x00000000, 0x01234567, 0x20202020, 0x81818181,
3474 0x81818180, 0x0f0f0f0f, 0x01010100, 0x7f7f7f7e};
3475
3476 uint64_t add_exp_d[] = {0x0000000000000000, 0x02468acf13579bde,
3477 0x2020202020202020, 0x8181818181818181,
3478 0xffffffffffffffff, 0x8f8f8f8f8f8f8f8f,
3479 0x0101010101010100, 0x7f7f7f7ffffffffe};
3480
TatWai Chong7a0d3672019-10-23 17:35:18 -07003481 ArithPredicatedFn fn = &MacroAssembler::Add;
TatWai Chong13634762019-07-16 16:20:45 -07003482 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, add_exp_b);
3483 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, add_exp_h);
3484 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, add_exp_s);
3485 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, add_exp_d);
3486
3487 unsigned sub_exp_b[] = {0x00, 0x00, 0x00, 0x81, 0x7e, 0x8f, 0x02, 0x7f};
3488
3489 unsigned sub_exp_h[] = {0x0000, 0x0000, 0x1010, 0x8181,
3490 0x7e7e, 0x8e8f, 0x0101, 0x7f80};
3491
3492 unsigned sub_exp_s[] = {0x00000000, 0x01234567, 0x00000000, 0x81818181,
3493 0x7e7e7e7e, 0x0f0f0f0f, 0x01010102, 0x7f7f7f80};
3494
3495 uint64_t sub_exp_d[] = {0x0000000000000000, 0x0000000000000000,
3496 0x0000000000000000, 0x8181818181818181,
3497 0xffffffffffffffff, 0x8e8e8e8e8e8e8e8f,
3498 0x0101010101010102, 0x7f7f7f8000000000};
3499
3500 fn = &MacroAssembler::Sub;
3501 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sub_exp_b);
3502 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sub_exp_h);
3503 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sub_exp_s);
3504 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sub_exp_d);
3505 // clang-format on
3506}
3507
3508TEST_SVE(sve_binary_arithmetic_predicated_umin_umax_uabd) {
3509 // clang-format off
3510 unsigned zn_b[] = {0x00, 0xff, 0x0f, 0xff, 0xf0, 0x98, 0x55, 0x67};
3511
3512 unsigned zm_b[] = {0x01, 0x00, 0x0e, 0xfe, 0xfe, 0xab, 0xcd, 0x78};
3513
3514 unsigned zn_h[] = {0x0000, 0xffff, 0x00ff, 0xffff,
3515 0xff00, 0xba98, 0x5555, 0x4567};
3516
3517 unsigned zm_h[] = {0x0001, 0x0000, 0x00ee, 0xfffe,
3518 0xfe00, 0xabab, 0xcdcd, 0x5678};
3519
3520 unsigned zn_s[] = {0x00000000, 0xffffffff, 0x0000ffff, 0xffffffff,
3521 0xffff0000, 0xfedcba98, 0x55555555, 0x01234567};
3522
3523 unsigned zm_s[] = {0x00000001, 0x00000000, 0x0000eeee, 0xfffffffe,
3524 0xfffe0000, 0xabababab, 0xcdcdcdcd, 0x12345678};
3525
3526 uint64_t zn_d[] = {0x0000000000000000, 0xffffffffffffffff,
3527 0x5555555555555555, 0x0000000001234567};
3528
3529 uint64_t zm_d[] = {0x0000000000000001, 0x0000000000000000,
3530 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
3531
3532 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
3533 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
3534 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
3535 int pg_d[] = {1, 0, 1, 1};
3536
3537 unsigned umax_exp_b[] = {0x01, 0xff, 0x0f, 0xff, 0xfe, 0xab, 0xcd, 0x67};
3538
3539 unsigned umax_exp_h[] = {0x0001, 0xffff, 0x00ff, 0xffff,
3540 0xff00, 0xba98, 0x5555, 0x5678};
3541
3542 unsigned umax_exp_s[] = {0x00000001, 0xffffffff, 0x0000ffff, 0xffffffff,
3543 0xffff0000, 0xfedcba98, 0xcdcdcdcd, 0x12345678};
3544
3545 uint64_t umax_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
3546 0xcdcdcdcdcdcdcdcd, 0x0000000012345678};
3547
TatWai Chong7a0d3672019-10-23 17:35:18 -07003548 ArithPredicatedFn fn = &MacroAssembler::Umax;
TatWai Chong13634762019-07-16 16:20:45 -07003549 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umax_exp_b);
3550 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umax_exp_h);
3551 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umax_exp_s);
3552 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umax_exp_d);
3553
3554 unsigned umin_exp_b[] = {0x00, 0x00, 0x0e, 0xff, 0xf0, 0x98, 0x55, 0x67};
3555
3556 unsigned umin_exp_h[] = {0x0000, 0x0000, 0x00ff, 0xfffe,
3557 0xfe00, 0xabab, 0x5555, 0x4567};
3558
3559 unsigned umin_exp_s[] = {0x00000000, 0xffffffff, 0x0000eeee, 0xfffffffe,
3560 0xfffe0000, 0xfedcba98, 0x55555555, 0x01234567};
3561
3562 uint64_t umin_exp_d[] = {0x0000000000000000, 0xffffffffffffffff,
3563 0x5555555555555555, 0x0000000001234567};
3564 fn = &MacroAssembler::Umin;
3565 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umin_exp_b);
3566 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umin_exp_h);
3567 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umin_exp_s);
3568 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umin_exp_d);
3569
3570 unsigned uabd_exp_b[] = {0x01, 0xff, 0x01, 0xff, 0x0e, 0x13, 0x78, 0x67};
3571
3572 unsigned uabd_exp_h[] = {0x0001, 0xffff, 0x00ff, 0x0001,
3573 0x0100, 0x0eed, 0x5555, 0x1111};
3574
3575 unsigned uabd_exp_s[] = {0x00000001, 0xffffffff, 0x00001111, 0x00000001,
3576 0x00010000, 0xfedcba98, 0x78787878, 0x11111111};
3577
3578 uint64_t uabd_exp_d[] = {0x0000000000000001, 0xffffffffffffffff,
3579 0x7878787878787878, 0x0000000011111111};
3580
3581 fn = &MacroAssembler::Uabd;
3582 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, uabd_exp_b);
3583 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, uabd_exp_h);
3584 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, uabd_exp_s);
3585 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, uabd_exp_d);
3586 // clang-format on
3587}
3588
3589TEST_SVE(sve_binary_arithmetic_predicated_smin_smax_sabd) {
3590 // clang-format off
3591 int zn_b[] = {0, -128, -128, -128, -128, 127, 127, 1};
3592
3593 int zm_b[] = {-1, 0, -1, -127, 127, 126, -1, 0};
3594
3595 int zn_h[] = {0, INT16_MIN, INT16_MIN, INT16_MIN,
3596 INT16_MIN, INT16_MAX, INT16_MAX, 1};
3597
3598 int zm_h[] = {-1, 0, -1, INT16_MIN + 1,
3599 INT16_MAX, INT16_MAX - 1, -1, 0};
3600
3601 int zn_s[] = {0, INT32_MIN, INT32_MIN, INT32_MIN,
3602 INT32_MIN, INT32_MAX, INT32_MAX, 1};
3603
3604 int zm_s[] = {-1, 0, -1, -INT32_MAX,
3605 INT32_MAX, INT32_MAX - 1, -1, 0};
3606
3607 int64_t zn_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
3608 INT64_MIN, INT64_MAX, INT64_MAX, 1};
3609
3610 int64_t zm_d[] = {-1, 0, -1, INT64_MIN + 1,
3611 INT64_MAX, INT64_MAX - 1, -1, 0};
3612
3613 int pg_b[] = {1, 1, 1, 0, 1, 1, 1, 0};
3614 int pg_h[] = {1, 1, 0, 1, 1, 1, 0, 1};
3615 int pg_s[] = {1, 0, 1, 1, 1, 0, 1, 1};
3616 int pg_d[] = {0, 1, 1, 1, 0, 1, 1, 1};
3617
3618 int smax_exp_b[] = {0, 0, -1, -128, 127, 127, 127, 1};
3619
3620 int smax_exp_h[] = {0, 0, INT16_MIN, INT16_MIN + 1,
3621 INT16_MAX, INT16_MAX, INT16_MAX, 1};
3622
3623 int smax_exp_s[] = {0, INT32_MIN, -1, INT32_MIN + 1,
3624 INT32_MAX, INT32_MAX, INT32_MAX, 1};
3625
3626 int64_t smax_exp_d[] = {0, 0, -1, INT64_MIN + 1,
3627 INT64_MIN, INT64_MAX, INT64_MAX, 1};
3628
TatWai Chong7a0d3672019-10-23 17:35:18 -07003629 ArithPredicatedFn fn = &MacroAssembler::Smax;
TatWai Chong13634762019-07-16 16:20:45 -07003630 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smax_exp_b);
3631 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smax_exp_h);
3632 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smax_exp_s);
3633 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smax_exp_d);
3634
3635 int smin_exp_b[] = {-1, -128, -128, -128, -128, 126, -1, 1};
3636
3637 int smin_exp_h[] = {-1, INT16_MIN, INT16_MIN, INT16_MIN,
3638 INT16_MIN, INT16_MAX - 1, INT16_MAX, 0};
3639
3640 int smin_exp_s[] = {-1, INT32_MIN, INT32_MIN, INT32_MIN,
3641 INT32_MIN, INT32_MAX, -1, 0};
3642
3643 int64_t smin_exp_d[] = {0, INT64_MIN, INT64_MIN, INT64_MIN,
3644 INT64_MIN, INT64_MAX - 1, -1, 0};
3645
3646 fn = &MacroAssembler::Smin;
3647 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, smin_exp_b);
3648 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, smin_exp_h);
3649 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, smin_exp_s);
3650 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, smin_exp_d);
3651
3652 unsigned sabd_exp_b[] = {1, 128, 127, 128, 255, 1, 128, 1};
3653
3654 unsigned sabd_exp_h[] = {1, 0x8000, 0x8000, 1, 0xffff, 1, 0x7fff, 1};
3655
3656 unsigned sabd_exp_s[] = {1, 0x80000000, 0x7fffffff, 1,
3657 0xffffffff, 0x7fffffff, 0x80000000, 1};
3658
3659 uint64_t sabd_exp_d[] = {0, 0x8000000000000000, 0x7fffffffffffffff, 1,
3660 0x8000000000000000, 1, 0x8000000000000000, 1};
3661
3662 fn = &MacroAssembler::Sabd;
3663 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, sabd_exp_b);
3664 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, sabd_exp_h);
3665 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, sabd_exp_s);
3666 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, sabd_exp_d);
3667 // clang-format on
3668}
3669
3670TEST_SVE(sve_binary_arithmetic_predicated_mul_umulh) {
3671 // clang-format off
3672 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
3673
3674 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
3675
3676 unsigned zn_h[] = {0x0000, 0x0001, 0x0020, 0x0800,
3677 0x8000, 0xff00, 0x5555, 0xaaaa};
3678
3679 unsigned zm_h[] = {0x007f, 0x00cd, 0x0800, 0xffff,
3680 0x5555, 0xaaaa, 0x0001, 0x1234};
3681
3682 unsigned zn_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
3683 0x12345678, 0xffffffff, 0x55555555, 0xaaaaaaaa};
3684
3685 unsigned zm_s[] = {0x00000000, 0x00000001, 0x00200020, 0x08000800,
3686 0x12345678, 0x22223333, 0x55556666, 0x77778888};
3687
3688 uint64_t zn_d[] = {0x0000000000000000, 0x5555555555555555,
3689 0xffffffffffffffff, 0xaaaaaaaaaaaaaaaa};
3690
3691 uint64_t zm_d[] = {0x0000000000000000, 0x1111111133333333,
3692 0xddddddddeeeeeeee, 0xaaaaaaaaaaaaaaaa};
3693
3694 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
3695 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
3696 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
3697 int pg_d[] = {1, 1, 0, 1};
3698
3699 unsigned mul_exp_b[] = {0x00, 0xcd, 0x00, 0xf8, 0x80, 0x56, 0x00, 0x50};
3700
3701 unsigned mul_exp_h[] = {0x0000, 0x0001, 0x0000, 0xf800,
3702 0x8000, 0xff00, 0x5555, 0x9e88};
3703
3704 unsigned mul_exp_s[] = {0x00000000, 0x00000001, 0x00200020, 0x00400000,
3705 0x1df4d840, 0xddddcccd, 0x55555555, 0xb05afa50};
3706
3707 uint64_t mul_exp_d[] = {0x0000000000000000, 0xa4fa4fa4eeeeeeef,
3708 0xffffffffffffffff, 0x38e38e38e38e38e4};
3709
TatWai Chong7a0d3672019-10-23 17:35:18 -07003710 ArithPredicatedFn fn = &MacroAssembler::Mul;
TatWai Chong13634762019-07-16 16:20:45 -07003711 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, mul_exp_b);
3712 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, mul_exp_h);
3713 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, mul_exp_s);
3714 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, mul_exp_d);
3715
3716 unsigned umulh_exp_b[] = {0x00, 0x00, 0x10, 0x07, 0x80, 0xa9, 0x00, 0x05};
3717
3718 unsigned umulh_exp_h[] = {0x0000, 0x0001, 0x0001, 0x07ff,
3719 0x2aaa, 0xff00, 0x0000, 0x0c22};
3720
3721 unsigned umulh_exp_s[] = {0x00000000, 0x00000000, 0x00200020, 0x00400080,
3722 0x014b66dc, 0x22223332, 0x55555555, 0x4fa505af};
3723
3724 uint64_t umulh_exp_d[] = {0x0000000000000000, 0x05b05b05bbbbbbbb,
3725 0xffffffffffffffff, 0x71c71c71c71c71c6};
3726
3727 fn = &MacroAssembler::Umulh;
3728 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, umulh_exp_b);
3729 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, umulh_exp_h);
3730 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, umulh_exp_s);
3731 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, umulh_exp_d);
3732 // clang-format on
3733}
3734
3735TEST_SVE(sve_binary_arithmetic_predicated_smulh) {
3736 // clang-format off
3737 int zn_b[] = {0, 1, -1, INT8_MIN, INT8_MAX, -1, 100, -3};
3738
3739 int zm_b[] = {0, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, -1, 2, 66};
3740
3741 int zn_h[] = {0, 1, -1, INT16_MIN, INT16_MAX, -1, 10000, -3};
3742
3743 int zm_h[] = {0, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, -1, 2, 6666};
3744
3745 int zn_s[] = {0, 1, -1, INT32_MIN, INT32_MAX, -1, 100000000, -3};
3746
3747 int zm_s[] = {0, INT32_MIN, INT32_MIN, INT32_MAX, INT32_MAX, -1, 2, 66666666};
3748
3749 int64_t zn_d[] = {0, -1, INT64_MIN, INT64_MAX};
3750
3751 int64_t zm_d[] = {INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX};
3752
3753 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
3754 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
3755 int pg_s[] = {1, 1, 0, 1, 1, 1, 0, 1};
3756 int pg_d[] = {1, 1, 0, 1};
3757
3758 int exp_b[] = {0, -1, 0, -64, INT8_MAX, 0, 0, -1};
3759
3760 int exp_h[] = {0, 1, 0, -16384, 16383, -1, 0, -1};
3761
3762 int exp_s[] = {0, -1, -1, -1073741824, 1073741823, 0, 100000000, -1};
3763
3764 int64_t exp_d[] = {0, -1, INT64_MIN, 4611686018427387903};
3765
TatWai Chong7a0d3672019-10-23 17:35:18 -07003766 ArithPredicatedFn fn = &MacroAssembler::Smulh;
TatWai Chong13634762019-07-16 16:20:45 -07003767 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, exp_b);
3768 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, exp_h);
3769 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
3770 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
3771 // clang-format on
3772}
3773
3774TEST_SVE(sve_binary_arithmetic_predicated_logical) {
3775 // clang-format off
3776 unsigned zn_b[] = {0x00, 0x01, 0x20, 0x08, 0x80, 0xff, 0x55, 0xaa};
3777 unsigned zm_b[] = {0x7f, 0xcd, 0x80, 0xff, 0x55, 0xaa, 0x00, 0x08};
3778
3779 unsigned zn_h[] = {0x0000, 0x0001, 0x2020, 0x0008,
3780 0x8000, 0xffff, 0x5555, 0xaaaa};
3781 unsigned zm_h[] = {0x7fff, 0xabcd, 0x8000, 0xffff,
3782 0x5555, 0xaaaa, 0x0000, 0x0800};
3783
3784 unsigned zn_s[] = {0x00000001, 0x20200008, 0x8000ffff, 0x5555aaaa};
3785 unsigned zm_s[] = {0x7fffabcd, 0x8000ffff, 0x5555aaaa, 0x00000800};
3786
3787 uint64_t zn_d[] = {0xfedcba9876543210, 0x0123456789abcdef,
3788 0x0001200880ff55aa, 0x0022446688aaccee};
3789 uint64_t zm_d[] = {0xffffeeeeddddcccc, 0xccccddddeeeeffff,
3790 0x7fcd80ff55aa0008, 0x1133557799bbddff};
3791
3792 int pg_b[] = {0, 1, 1, 1, 0, 1, 1, 1};
3793 int pg_h[] = {1, 0, 1, 1, 1, 0, 1, 1};
3794 int pg_s[] = {1, 1, 1, 0};
3795 int pg_d[] = {1, 1, 0, 1};
3796
3797 unsigned and_exp_b[] = {0x00, 0x01, 0x00, 0x08, 0x80, 0xaa, 0x00, 0x08};
3798
3799 unsigned and_exp_h[] = {0x0000, 0x0001, 0x0000, 0x0008,
3800 0x0000, 0xffff, 0x0000, 0x0800};
3801
3802 unsigned and_exp_s[] = {0x00000001, 0x00000008, 0x0000aaaa, 0x5555aaaa};
3803
3804 uint64_t and_exp_d[] = {0xfedcaa8854540000, 0x0000454588aacdef,
3805 0x0001200880ff55aa, 0x0022446688aaccee};
3806
TatWai Chong7a0d3672019-10-23 17:35:18 -07003807 ArithPredicatedFn fn = &MacroAssembler::And;
TatWai Chong13634762019-07-16 16:20:45 -07003808 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, and_exp_b);
3809 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, and_exp_h);
3810 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, and_exp_s);
3811 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, and_exp_d);
3812
3813 unsigned bic_exp_b[] = {0x00, 0x00, 0x20, 0x00, 0x80, 0x55, 0x55, 0xa2};
3814
3815 unsigned bic_exp_h[] = {0x0000, 0x0001, 0x2020, 0x0000,
3816 0x8000, 0xffff, 0x5555, 0xa2aa};
3817
3818 unsigned bic_exp_s[] = {0x00000000, 0x20200000, 0x80005555, 0x5555aaaa};
3819
3820 uint64_t bic_exp_d[] = {0x0000101022003210, 0x0123002201010000,
3821 0x0001200880ff55aa, 0x0000000000000000};
3822
3823 fn = &MacroAssembler::Bic;
3824 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, bic_exp_b);
3825 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, bic_exp_h);
3826 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, bic_exp_s);
3827 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, bic_exp_d);
3828
3829 unsigned eor_exp_b[] = {0x00, 0xcc, 0xa0, 0xf7, 0x80, 0x55, 0x55, 0xa2};
3830
3831 unsigned eor_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xfff7,
3832 0xd555, 0xffff, 0x5555, 0xa2aa};
3833
3834 unsigned eor_exp_s[] = {0x7fffabcc, 0xa020fff7, 0xd5555555, 0x5555aaaa};
3835
3836 uint64_t eor_exp_d[] = {0x01235476ab89fedc, 0xcdef98ba67453210,
3837 0x0001200880ff55aa, 0x1111111111111111};
3838
3839 fn = &MacroAssembler::Eor;
3840 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, eor_exp_b);
3841 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, eor_exp_h);
3842 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, eor_exp_s);
3843 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, eor_exp_d);
3844
3845 unsigned orr_exp_b[] = {0x00, 0xcd, 0xa0, 0xff, 0x80, 0xff, 0x55, 0xaa};
3846
3847 unsigned orr_exp_h[] = {0x7fff, 0x0001, 0xa020, 0xffff,
3848 0xd555, 0xffff, 0x5555, 0xaaaa};
3849
3850 unsigned orr_exp_s[] = {0x7fffabcd, 0xa020ffff, 0xd555ffff, 0x5555aaaa};
3851
3852 uint64_t orr_exp_d[] = {0xfffffefeffddfedc, 0xcdefddffefefffff,
3853 0x0001200880ff55aa, 0x1133557799bbddff};
3854
3855 fn = &MacroAssembler::Orr;
3856 IntBinArithHelper(config, fn, kBRegSize, pg_b, zn_b, zm_b, orr_exp_b);
3857 IntBinArithHelper(config, fn, kHRegSize, pg_h, zn_h, zm_h, orr_exp_h);
3858 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, orr_exp_s);
3859 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, orr_exp_d);
3860 // clang-format on
3861}
3862
3863TEST_SVE(sve_binary_arithmetic_predicated_sdiv) {
3864 // clang-format off
3865 int zn_s[] = {0, 1, -1, 2468,
3866 INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX,
3867 -11111111, 87654321, 0, 0};
3868
3869 int zm_s[] = {1, -1, 1, 1234,
3870 -1, INT32_MIN, 1, -1,
3871 22222222, 80000000, -1, 0};
3872
3873 int64_t zn_d[] = {0, 1, -1, 2468,
3874 INT64_MIN, INT64_MAX, INT64_MIN, INT64_MAX,
3875 -11111111, 87654321, 0, 0};
3876
3877 int64_t zm_d[] = {1, -1, 1, 1234,
3878 -1, INT64_MIN, 1, -1,
3879 22222222, 80000000, -1, 0};
3880
3881 int pg_s[] = {1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0};
3882 int pg_d[] = {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1};
3883
3884 int exp_s[] = {0, 1, -1, 2,
3885 INT32_MIN, 0, INT32_MIN, -INT32_MAX,
3886 0, 1, 0, 0};
3887
3888 int64_t exp_d[] = {0, -1, -1, 2,
3889 INT64_MIN, INT64_MAX, INT64_MIN, -INT64_MAX,
3890 0, 1, 0, 0};
3891
TatWai Chong7a0d3672019-10-23 17:35:18 -07003892 ArithPredicatedFn fn = &MacroAssembler::Sdiv;
TatWai Chong13634762019-07-16 16:20:45 -07003893 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
3894 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
3895 // clang-format on
3896}
3897
3898TEST_SVE(sve_binary_arithmetic_predicated_udiv) {
3899 // clang-format off
3900 unsigned zn_s[] = {0x00000000, 0x00000001, 0xffffffff, 0x80000000,
3901 0xffffffff, 0x80000000, 0xffffffff, 0x0000f000};
3902
3903 unsigned zm_s[] = {0x00000001, 0xffffffff, 0x80000000, 0x00000002,
3904 0x00000000, 0x00000001, 0x00008000, 0xf0000000};
3905
3906 uint64_t zn_d[] = {0x0000000000000000, 0x0000000000000001,
3907 0xffffffffffffffff, 0x8000000000000000,
3908 0xffffffffffffffff, 0x8000000000000000,
3909 0xffffffffffffffff, 0xf0000000f0000000};
3910
3911 uint64_t zm_d[] = {0x0000000000000001, 0xffffffff00000000,
3912 0x8000000000000000, 0x0000000000000002,
3913 0x8888888888888888, 0x0000000000000001,
3914 0x0000000080000000, 0x00000000f0000000};
3915
3916 int pg_s[] = {1, 1, 0, 1, 1, 0, 1, 1};
3917 int pg_d[] = {1, 0, 1, 1, 1, 1, 0, 1};
3918
3919 unsigned exp_s[] = {0x00000000, 0x00000000, 0xffffffff, 0x40000000,
3920 0x00000000, 0x80000000, 0x0001ffff, 0x00000000};
3921
3922 uint64_t exp_d[] = {0x0000000000000000, 0x0000000000000001,
3923 0x0000000000000001, 0x4000000000000000,
3924 0x0000000000000001, 0x8000000000000000,
3925 0xffffffffffffffff, 0x0000000100000001};
3926
TatWai Chong7a0d3672019-10-23 17:35:18 -07003927 ArithPredicatedFn fn = &MacroAssembler::Udiv;
TatWai Chong13634762019-07-16 16:20:45 -07003928 IntBinArithHelper(config, fn, kSRegSize, pg_s, zn_s, zm_s, exp_s);
3929 IntBinArithHelper(config, fn, kDRegSize, pg_d, zn_d, zm_d, exp_d);
3930 // clang-format on
3931}
3932
TatWai Chong7a0d3672019-10-23 17:35:18 -07003933typedef void (MacroAssembler::*ArithFn)(const ZRegister& zd,
3934 const ZRegister& zn,
3935 const ZRegister& zm);
TatWai Chong845246b2019-08-08 00:01:58 -07003936
3937template <typename T>
3938static void IntArithHelper(Test* config,
TatWai Chong7a0d3672019-10-23 17:35:18 -07003939 ArithFn macro,
TatWai Chong845246b2019-08-08 00:01:58 -07003940 unsigned lane_size_in_bits,
3941 const T& zn_inputs,
3942 const T& zm_inputs,
3943 const T& zd_expected) {
3944 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
3945 START();
3946
3947 ZRegister zn = z31.WithLaneSize(lane_size_in_bits);
3948 ZRegister zm = z27.WithLaneSize(lane_size_in_bits);
3949 InsrHelper(&masm, zn, zn_inputs);
3950 InsrHelper(&masm, zm, zm_inputs);
3951
3952 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
3953 (masm.*macro)(zd, zn, zm);
3954
3955 END();
3956
3957 if (CAN_RUN()) {
3958 RUN();
3959 ASSERT_EQUAL_SVE(zd_expected, zd);
3960 }
3961}
3962
3963TEST_SVE(sve_arithmetic_unpredicated_add_sqadd_uqadd) {
3964 // clang-format off
TatWai Chong6995bfd2019-09-26 10:48:05 +01003965 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xaa, 0x55, 0xff, 0xf0};
3966 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa, 0x5555, 0xffff, 0xf0f0};
3967 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0x10001010, 0xaaaaaaaa, 0xf000f0f0};
3968 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
TatWai Chong845246b2019-08-08 00:01:58 -07003969 0x1000000010001010, 0xf0000000f000f0f0};
3970
TatWai Chong7a0d3672019-10-23 17:35:18 -07003971 ArithFn fn = &MacroAssembler::Add;
TatWai Chong845246b2019-08-08 00:01:58 -07003972
3973 unsigned add_exp_b[] = {0x02, 0xfe, 0x20, 0x54, 0xaa, 0xfe, 0xe0};
3974 unsigned add_exp_h[] = {0x0302, 0xfefe, 0x2020, 0x5554, 0xaaaa, 0xfffe, 0xe1e0};
3975 unsigned add_exp_s[] = {0x00030302, 0xfffefefe, 0x20002020, 0x55555554, 0xe001e1e0};
3976 uint64_t add_exp_d[] = {0x0000000300030302, 0xfffffffefffefefe,
3977 0x2000000020002020, 0xe0000001e001e1e0};
3978
TatWai Chong6995bfd2019-09-26 10:48:05 +01003979 IntArithHelper(config, fn, kBRegSize, in_b, in_b, add_exp_b);
3980 IntArithHelper(config, fn, kHRegSize, in_h, in_h, add_exp_h);
3981 IntArithHelper(config, fn, kSRegSize, in_s, in_s, add_exp_s);
3982 IntArithHelper(config, fn, kDRegSize, in_d, in_d, add_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07003983
3984 fn = &MacroAssembler::Sqadd;
3985
3986 unsigned sqadd_exp_b[] = {0x80, 0x7f, 0x20, 0x80, 0x7f, 0xfe, 0xe0};
3987 unsigned sqadd_exp_h[] = {0x8000, 0x7fff, 0x2020, 0x8000, 0x7fff, 0xfffe, 0xe1e0};
3988 unsigned sqadd_exp_s[] = {0x80000000, 0x7fffffff, 0x20002020, 0x80000000, 0xe001e1e0};
3989 uint64_t sqadd_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
3990 0x2000000020002020, 0xe0000001e001e1e0};
3991
TatWai Chong6995bfd2019-09-26 10:48:05 +01003992 IntArithHelper(config, fn, kBRegSize, in_b, in_b, sqadd_exp_b);
3993 IntArithHelper(config, fn, kHRegSize, in_h, in_h, sqadd_exp_h);
3994 IntArithHelper(config, fn, kSRegSize, in_s, in_s, sqadd_exp_s);
3995 IntArithHelper(config, fn, kDRegSize, in_d, in_d, sqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07003996
3997 fn = &MacroAssembler::Uqadd;
3998
3999 unsigned uqadd_exp_b[] = {0xff, 0xfe, 0x20, 0xff, 0xaa, 0xff, 0xff};
4000 unsigned uqadd_exp_h[] = {0xffff, 0xfefe, 0x2020, 0xffff, 0xaaaa, 0xffff, 0xffff};
4001 unsigned uqadd_exp_s[] = {0xffffffff, 0xfffefefe, 0x20002020, 0xffffffff, 0xffffffff};
4002 uint64_t uqadd_exp_d[] = {0xffffffffffffffff, 0xfffffffefffefefe,
4003 0x2000000020002020, 0xffffffffffffffff};
4004
TatWai Chong6995bfd2019-09-26 10:48:05 +01004005 IntArithHelper(config, fn, kBRegSize, in_b, in_b, uqadd_exp_b);
4006 IntArithHelper(config, fn, kHRegSize, in_h, in_h, uqadd_exp_h);
4007 IntArithHelper(config, fn, kSRegSize, in_s, in_s, uqadd_exp_s);
4008 IntArithHelper(config, fn, kDRegSize, in_d, in_d, uqadd_exp_d);
TatWai Chong845246b2019-08-08 00:01:58 -07004009 // clang-format on
4010}
4011
4012TEST_SVE(sve_arithmetic_unpredicated_sub_sqsub_uqsub) {
4013 // clang-format off
4014
4015 unsigned ins1_b[] = {0x81, 0x7f, 0x7e, 0xaa};
4016 unsigned ins2_b[] = {0x10, 0xf0, 0xf0, 0x55};
4017
4018 unsigned ins1_h[] = {0x8181, 0x7f7f, 0x7e7e, 0xaaaa};
4019 unsigned ins2_h[] = {0x1010, 0xf0f0, 0xf0f0, 0x5555};
4020
4021 unsigned ins1_s[] = {0x80018181, 0x7fff7f7f, 0x7eee7e7e, 0xaaaaaaaa};
4022 unsigned ins2_s[] = {0x10001010, 0xf000f0f0, 0xf000f0f0, 0x55555555};
4023
4024 uint64_t ins1_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f,
4025 0x7eeeeeee7eee7e7e, 0xaaaaaaaaaaaaaaaa};
4026 uint64_t ins2_d[] = {0x1000000010001010, 0xf0000000f000f0f0,
4027 0xf0000000f000f0f0, 0x5555555555555555};
4028
TatWai Chong7a0d3672019-10-23 17:35:18 -07004029 ArithFn fn = &MacroAssembler::Sub;
TatWai Chong845246b2019-08-08 00:01:58 -07004030
4031 unsigned ins1_sub_ins2_exp_b[] = {0x71, 0x8f, 0x8e, 0x55};
4032 unsigned ins1_sub_ins2_exp_h[] = {0x7171, 0x8e8f, 0x8d8e, 0x5555};
4033 unsigned ins1_sub_ins2_exp_s[] = {0x70017171, 0x8ffe8e8f, 0x8eed8d8e, 0x55555555};
4034 uint64_t ins1_sub_ins2_exp_d[] = {0x7000000170017171, 0x8ffffffe8ffe8e8f,
4035 0x8eeeeeed8eed8d8e, 0x5555555555555555};
4036
4037 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sub_ins2_exp_b);
4038 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sub_ins2_exp_h);
4039 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sub_ins2_exp_s);
4040 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sub_ins2_exp_d);
4041
4042 unsigned ins2_sub_ins1_exp_b[] = {0x8f, 0x71, 0x72, 0xab};
4043 unsigned ins2_sub_ins1_exp_h[] = {0x8e8f, 0x7171, 0x7272, 0xaaab};
4044 unsigned ins2_sub_ins1_exp_s[] = {0x8ffe8e8f, 0x70017171, 0x71127272, 0xaaaaaaab};
4045 uint64_t ins2_sub_ins1_exp_d[] = {0x8ffffffe8ffe8e8f, 0x7000000170017171,
4046 0x7111111271127272, 0xaaaaaaaaaaaaaaab};
4047
4048 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sub_ins1_exp_b);
4049 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sub_ins1_exp_h);
4050 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sub_ins1_exp_s);
4051 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sub_ins1_exp_d);
4052
4053 fn = &MacroAssembler::Sqsub;
4054
4055 unsigned ins1_sqsub_ins2_exp_b[] = {0x80, 0x7f, 0x7f, 0x80};
4056 unsigned ins1_sqsub_ins2_exp_h[] = {0x8000, 0x7fff, 0x7fff, 0x8000};
4057 unsigned ins1_sqsub_ins2_exp_s[] = {0x80000000, 0x7fffffff, 0x7fffffff, 0x80000000};
4058 uint64_t ins1_sqsub_ins2_exp_d[] = {0x8000000000000000, 0x7fffffffffffffff,
4059 0x7fffffffffffffff, 0x8000000000000000};
4060
4061 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_sqsub_ins2_exp_b);
4062 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_sqsub_ins2_exp_h);
4063 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_sqsub_ins2_exp_s);
4064 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_sqsub_ins2_exp_d);
4065
4066 unsigned ins2_sqsub_ins1_exp_b[] = {0x7f, 0x80, 0x80, 0x7f};
4067 unsigned ins2_sqsub_ins1_exp_h[] = {0x7fff, 0x8000, 0x8000, 0x7fff};
4068 unsigned ins2_sqsub_ins1_exp_s[] = {0x7fffffff, 0x80000000, 0x80000000, 0x7fffffff};
4069 uint64_t ins2_sqsub_ins1_exp_d[] = {0x7fffffffffffffff, 0x8000000000000000,
4070 0x8000000000000000, 0x7fffffffffffffff};
4071
4072 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_sqsub_ins1_exp_b);
4073 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_sqsub_ins1_exp_h);
4074 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_sqsub_ins1_exp_s);
4075 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_sqsub_ins1_exp_d);
4076
4077 fn = &MacroAssembler::Uqsub;
4078
4079 unsigned ins1_uqsub_ins2_exp_b[] = {0x71, 0x00, 0x00, 0x55};
4080 unsigned ins1_uqsub_ins2_exp_h[] = {0x7171, 0x0000, 0x0000, 0x5555};
4081 unsigned ins1_uqsub_ins2_exp_s[] = {0x70017171, 0x00000000, 0x00000000, 0x55555555};
4082 uint64_t ins1_uqsub_ins2_exp_d[] = {0x7000000170017171, 0x0000000000000000,
4083 0x0000000000000000, 0x5555555555555555};
4084
4085 IntArithHelper(config, fn, kBRegSize, ins1_b, ins2_b, ins1_uqsub_ins2_exp_b);
4086 IntArithHelper(config, fn, kHRegSize, ins1_h, ins2_h, ins1_uqsub_ins2_exp_h);
4087 IntArithHelper(config, fn, kSRegSize, ins1_s, ins2_s, ins1_uqsub_ins2_exp_s);
4088 IntArithHelper(config, fn, kDRegSize, ins1_d, ins2_d, ins1_uqsub_ins2_exp_d);
4089
4090 unsigned ins2_uqsub_ins1_exp_b[] = {0x00, 0x71, 0x72, 0x00};
4091 unsigned ins2_uqsub_ins1_exp_h[] = {0x0000, 0x7171, 0x7272, 0x0000};
4092 unsigned ins2_uqsub_ins1_exp_s[] = {0x00000000, 0x70017171, 0x71127272, 0x00000000};
4093 uint64_t ins2_uqsub_ins1_exp_d[] = {0x0000000000000000, 0x7000000170017171,
4094 0x7111111271127272, 0x0000000000000000};
4095
4096 IntArithHelper(config, fn, kBRegSize, ins2_b, ins1_b, ins2_uqsub_ins1_exp_b);
4097 IntArithHelper(config, fn, kHRegSize, ins2_h, ins1_h, ins2_uqsub_ins1_exp_h);
4098 IntArithHelper(config, fn, kSRegSize, ins2_s, ins1_s, ins2_uqsub_ins1_exp_s);
4099 IntArithHelper(config, fn, kDRegSize, ins2_d, ins1_d, ins2_uqsub_ins1_exp_d);
4100 // clang-format on
4101}
4102
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01004103TEST_SVE(sve_rdvl) {
4104 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4105 START();
4106
4107 // Encodable multipliers.
4108 __ Rdvl(x0, 0);
4109 __ Rdvl(x1, 1);
4110 __ Rdvl(x2, 2);
4111 __ Rdvl(x3, 31);
4112 __ Rdvl(x4, -1);
4113 __ Rdvl(x5, -2);
4114 __ Rdvl(x6, -32);
4115
4116 // For unencodable multipliers, the MacroAssembler uses a sequence of
4117 // instructions.
4118 __ Rdvl(x10, 32);
4119 __ Rdvl(x11, -33);
4120 __ Rdvl(x12, 42);
4121 __ Rdvl(x13, -42);
4122
4123 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
4124 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
4125 // occurs in the macro.
4126 __ Rdvl(x14, 0x007fffffffffffff);
4127 __ Rdvl(x15, -0x0080000000000000);
4128
4129 END();
4130
4131 if (CAN_RUN()) {
4132 RUN();
4133
4134 uint64_t vl = config->sve_vl_in_bytes();
4135
4136 ASSERT_EQUAL_64(vl * 0, x0);
4137 ASSERT_EQUAL_64(vl * 1, x1);
4138 ASSERT_EQUAL_64(vl * 2, x2);
4139 ASSERT_EQUAL_64(vl * 31, x3);
4140 ASSERT_EQUAL_64(vl * -1, x4);
4141 ASSERT_EQUAL_64(vl * -2, x5);
4142 ASSERT_EQUAL_64(vl * -32, x6);
4143
4144 ASSERT_EQUAL_64(vl * 32, x10);
4145 ASSERT_EQUAL_64(vl * -33, x11);
4146 ASSERT_EQUAL_64(vl * 42, x12);
4147 ASSERT_EQUAL_64(vl * -42, x13);
4148
4149 ASSERT_EQUAL_64(vl * 0x007fffffffffffff, x14);
4150 ASSERT_EQUAL_64(vl * 0xff80000000000000, x15);
4151 }
4152}
4153
4154TEST_SVE(sve_rdpl) {
4155 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4156 START();
4157
4158 // There is no `rdpl` instruction, so the MacroAssembler maps `Rdpl` onto
4159 // Addpl(xd, xzr, ...).
4160
4161 // Encodable multipliers (as `addvl`).
4162 __ Rdpl(x0, 0);
4163 __ Rdpl(x1, 8);
4164 __ Rdpl(x2, 248);
4165 __ Rdpl(x3, -8);
4166 __ Rdpl(x4, -256);
4167
4168 // Encodable multipliers (as `movz` + `addpl`).
4169 __ Rdpl(x7, 31);
Jacob Bramley889984c2019-10-28 17:28:48 +00004170 __ Rdpl(x8, -31);
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01004171
4172 // For unencodable multipliers, the MacroAssembler uses a sequence of
4173 // instructions.
4174 __ Rdpl(x10, 42);
4175 __ Rdpl(x11, -42);
4176
4177 // The maximum value of VL is 256 (bytes), so the multiplier is limited to the
4178 // range [INT64_MIN/256, INT64_MAX/256], to ensure that no signed overflow
4179 // occurs in the macro.
4180 __ Rdpl(x12, 0x007fffffffffffff);
4181 __ Rdpl(x13, -0x0080000000000000);
4182
4183 END();
4184
4185 if (CAN_RUN()) {
4186 RUN();
4187
4188 uint64_t vl = config->sve_vl_in_bytes();
4189 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4190 uint64_t pl = vl / kZRegBitsPerPRegBit;
4191
4192 ASSERT_EQUAL_64(pl * 0, x0);
4193 ASSERT_EQUAL_64(pl * 8, x1);
4194 ASSERT_EQUAL_64(pl * 248, x2);
4195 ASSERT_EQUAL_64(pl * -8, x3);
4196 ASSERT_EQUAL_64(pl * -256, x4);
4197
4198 ASSERT_EQUAL_64(pl * 31, x7);
Jacob Bramley889984c2019-10-28 17:28:48 +00004199 ASSERT_EQUAL_64(pl * -31, x8);
Jacob Bramley9e5da2a2019-08-06 18:52:07 +01004200
4201 ASSERT_EQUAL_64(pl * 42, x10);
4202 ASSERT_EQUAL_64(pl * -42, x11);
4203
4204 ASSERT_EQUAL_64(pl * 0x007fffffffffffff, x12);
4205 ASSERT_EQUAL_64(pl * 0xff80000000000000, x13);
4206 }
4207}
4208
4209TEST_SVE(sve_addvl) {
4210 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4211 START();
4212
4213 uint64_t base = 0x1234567800000000;
4214 __ Mov(x30, base);
4215
4216 // Encodable multipliers.
4217 __ Addvl(x0, x30, 0);
4218 __ Addvl(x1, x30, 1);
4219 __ Addvl(x2, x30, 31);
4220 __ Addvl(x3, x30, -1);
4221 __ Addvl(x4, x30, -32);
4222
4223 // For unencodable multipliers, the MacroAssembler uses `Rdvl` and `Add`.
4224 __ Addvl(x5, x30, 32);
4225 __ Addvl(x6, x30, -33);
4226
4227 // Test the limits of the multiplier supported by the `Rdvl` macro.
4228 __ Addvl(x7, x30, 0x007fffffffffffff);
4229 __ Addvl(x8, x30, -0x0080000000000000);
4230
4231 // Check that xzr behaves correctly.
4232 __ Addvl(x9, xzr, 8);
4233 __ Addvl(x10, xzr, 42);
4234
4235 // Check that sp behaves correctly with encodable and unencodable multipliers.
4236 __ Addvl(sp, sp, -5);
4237 __ Addvl(sp, sp, -37);
4238 __ Addvl(x11, sp, -2);
4239 __ Addvl(sp, x11, 2);
4240 __ Addvl(x12, sp, -42);
4241
4242 // Restore the value of sp.
4243 __ Addvl(sp, x11, 39);
4244 __ Addvl(sp, sp, 5);
4245
4246 // Adjust x11 and x12 to make the test sp-agnostic.
4247 __ Sub(x11, sp, x11);
4248 __ Sub(x12, sp, x12);
4249
4250 // Check cases where xd.Is(xn). This stresses scratch register allocation.
4251 __ Mov(x20, x30);
4252 __ Mov(x21, x30);
4253 __ Mov(x22, x30);
4254 __ Addvl(x20, x20, 4);
4255 __ Addvl(x21, x21, 42);
4256 __ Addvl(x22, x22, -0x0080000000000000);
4257
4258 END();
4259
4260 if (CAN_RUN()) {
4261 RUN();
4262
4263 uint64_t vl = config->sve_vl_in_bytes();
4264
4265 ASSERT_EQUAL_64(base + (vl * 0), x0);
4266 ASSERT_EQUAL_64(base + (vl * 1), x1);
4267 ASSERT_EQUAL_64(base + (vl * 31), x2);
4268 ASSERT_EQUAL_64(base + (vl * -1), x3);
4269 ASSERT_EQUAL_64(base + (vl * -32), x4);
4270
4271 ASSERT_EQUAL_64(base + (vl * 32), x5);
4272 ASSERT_EQUAL_64(base + (vl * -33), x6);
4273
4274 ASSERT_EQUAL_64(base + (vl * 0x007fffffffffffff), x7);
4275 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x8);
4276
4277 ASSERT_EQUAL_64(vl * 8, x9);
4278 ASSERT_EQUAL_64(vl * 42, x10);
4279
4280 ASSERT_EQUAL_64(vl * 44, x11);
4281 ASSERT_EQUAL_64(vl * 84, x12);
4282
4283 ASSERT_EQUAL_64(base + (vl * 4), x20);
4284 ASSERT_EQUAL_64(base + (vl * 42), x21);
4285 ASSERT_EQUAL_64(base + (vl * 0xff80000000000000), x22);
4286
4287 ASSERT_EQUAL_64(base, x30);
4288 }
4289}
4290
4291TEST_SVE(sve_addpl) {
4292 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4293 START();
4294
4295 uint64_t base = 0x1234567800000000;
4296 __ Mov(x30, base);
4297
4298 // Encodable multipliers.
4299 __ Addpl(x0, x30, 0);
4300 __ Addpl(x1, x30, 1);
4301 __ Addpl(x2, x30, 31);
4302 __ Addpl(x3, x30, -1);
4303 __ Addpl(x4, x30, -32);
4304
4305 // For unencodable multipliers, the MacroAssembler uses `Addvl` if it can, or
4306 // it falls back to `Rdvl` and `Add`.
4307 __ Addpl(x5, x30, 32);
4308 __ Addpl(x6, x30, -33);
4309
4310 // Test the limits of the multiplier supported by the `Rdvl` macro.
4311 __ Addpl(x7, x30, 0x007fffffffffffff);
4312 __ Addpl(x8, x30, -0x0080000000000000);
4313
4314 // Check that xzr behaves correctly.
4315 __ Addpl(x9, xzr, 8);
4316 __ Addpl(x10, xzr, 42);
4317
4318 // Check that sp behaves correctly with encodable and unencodable multipliers.
4319 __ Addpl(sp, sp, -5);
4320 __ Addpl(sp, sp, -37);
4321 __ Addpl(x11, sp, -2);
4322 __ Addpl(sp, x11, 2);
4323 __ Addpl(x12, sp, -42);
4324
4325 // Restore the value of sp.
4326 __ Addpl(sp, x11, 39);
4327 __ Addpl(sp, sp, 5);
4328
4329 // Adjust x11 and x12 to make the test sp-agnostic.
4330 __ Sub(x11, sp, x11);
4331 __ Sub(x12, sp, x12);
4332
4333 // Check cases where xd.Is(xn). This stresses scratch register allocation.
4334 __ Mov(x20, x30);
4335 __ Mov(x21, x30);
4336 __ Mov(x22, x30);
4337 __ Addpl(x20, x20, 4);
4338 __ Addpl(x21, x21, 42);
4339 __ Addpl(x22, x22, -0x0080000000000000);
4340
4341 END();
4342
4343 if (CAN_RUN()) {
4344 RUN();
4345
4346 uint64_t vl = config->sve_vl_in_bytes();
4347 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4348 uint64_t pl = vl / kZRegBitsPerPRegBit;
4349
4350 ASSERT_EQUAL_64(base + (pl * 0), x0);
4351 ASSERT_EQUAL_64(base + (pl * 1), x1);
4352 ASSERT_EQUAL_64(base + (pl * 31), x2);
4353 ASSERT_EQUAL_64(base + (pl * -1), x3);
4354 ASSERT_EQUAL_64(base + (pl * -32), x4);
4355
4356 ASSERT_EQUAL_64(base + (pl * 32), x5);
4357 ASSERT_EQUAL_64(base + (pl * -33), x6);
4358
4359 ASSERT_EQUAL_64(base + (pl * 0x007fffffffffffff), x7);
4360 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x8);
4361
4362 ASSERT_EQUAL_64(pl * 8, x9);
4363 ASSERT_EQUAL_64(pl * 42, x10);
4364
4365 ASSERT_EQUAL_64(pl * 44, x11);
4366 ASSERT_EQUAL_64(pl * 84, x12);
4367
4368 ASSERT_EQUAL_64(base + (pl * 4), x20);
4369 ASSERT_EQUAL_64(base + (pl * 42), x21);
4370 ASSERT_EQUAL_64(base + (pl * 0xff80000000000000), x22);
4371
4372 ASSERT_EQUAL_64(base, x30);
4373 }
4374}
4375
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004376TEST_SVE(sve_calculate_sve_address) {
4377 // Shadow the `MacroAssembler` type so that the test macros work without
4378 // modification.
4379 typedef CalculateSVEAddressMacroAssembler MacroAssembler;
4380
Jacob Bramley1314c462019-08-08 10:54:16 +01004381 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004382 START(); // NOLINT(clang-diagnostic-local-type-template-args)
Jacob Bramley1314c462019-08-08 10:54:16 +01004383
4384 uint64_t base = 0x1234567800000000;
4385 __ Mov(x28, base);
4386 __ Mov(x29, 48);
4387 __ Mov(x30, -48);
4388
4389 // Simple scalar (or equivalent) cases.
4390
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004391 __ CalculateSVEAddress(x0, SVEMemOperand(x28));
4392 __ CalculateSVEAddress(x1, SVEMemOperand(x28, 0));
4393 __ CalculateSVEAddress(x2, SVEMemOperand(x28, 0, SVE_MUL_VL));
4394 __ CalculateSVEAddress(x3, SVEMemOperand(x28, 0, SVE_MUL_VL), 3);
4395 __ CalculateSVEAddress(x4, SVEMemOperand(x28, xzr));
4396 __ CalculateSVEAddress(x5, SVEMemOperand(x28, xzr, LSL, 42));
Jacob Bramley1314c462019-08-08 10:54:16 +01004397
4398 // scalar-plus-immediate
4399
4400 // Unscaled immediates, handled with `Add`.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004401 __ CalculateSVEAddress(x6, SVEMemOperand(x28, 42));
4402 __ CalculateSVEAddress(x7, SVEMemOperand(x28, -42));
Jacob Bramley1314c462019-08-08 10:54:16 +01004403 // Scaled immediates, handled with `Addvl` or `Addpl`.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004404 __ CalculateSVEAddress(x8, SVEMemOperand(x28, 31, SVE_MUL_VL), 0);
4405 __ CalculateSVEAddress(x9, SVEMemOperand(x28, -32, SVE_MUL_VL), 0);
Jacob Bramley1314c462019-08-08 10:54:16 +01004406 // Out of `addvl` or `addpl` range.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004407 __ CalculateSVEAddress(x10, SVEMemOperand(x28, 42, SVE_MUL_VL), 0);
4408 __ CalculateSVEAddress(x11, SVEMemOperand(x28, -42, SVE_MUL_VL), 0);
4409 // As above, for VL-based accesses smaller than a Z register.
4410 VIXL_STATIC_ASSERT(kZRegBitsPerPRegBitLog2 == 3);
4411 __ CalculateSVEAddress(x12, SVEMemOperand(x28, -32 * 8, SVE_MUL_VL), 3);
4412 __ CalculateSVEAddress(x13, SVEMemOperand(x28, -42 * 8, SVE_MUL_VL), 3);
4413 __ CalculateSVEAddress(x14, SVEMemOperand(x28, -32 * 4, SVE_MUL_VL), 2);
4414 __ CalculateSVEAddress(x15, SVEMemOperand(x28, -42 * 4, SVE_MUL_VL), 2);
4415 __ CalculateSVEAddress(x18, SVEMemOperand(x28, -32 * 2, SVE_MUL_VL), 1);
4416 __ CalculateSVEAddress(x19, SVEMemOperand(x28, -42 * 2, SVE_MUL_VL), 1);
Jacob Bramley1314c462019-08-08 10:54:16 +01004417
4418 // scalar-plus-scalar
4419
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004420 __ CalculateSVEAddress(x20, SVEMemOperand(x28, x29));
4421 __ CalculateSVEAddress(x21, SVEMemOperand(x28, x30));
4422 __ CalculateSVEAddress(x22, SVEMemOperand(x28, x29, LSL, 8));
4423 __ CalculateSVEAddress(x23, SVEMemOperand(x28, x30, LSL, 8));
Jacob Bramley1314c462019-08-08 10:54:16 +01004424
4425 // In-place updates, to stress scratch register allocation.
4426
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004427 __ Mov(x24, 0xabcd000000000000);
4428 __ Mov(x25, 0xabcd101100000000);
4429 __ Mov(x26, 0xabcd202200000000);
4430 __ Mov(x27, 0xabcd303300000000);
4431 __ Mov(x28, 0xabcd404400000000);
4432 __ Mov(x29, 0xabcd505500000000);
Jacob Bramley1314c462019-08-08 10:54:16 +01004433
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004434 __ CalculateSVEAddress(x24, SVEMemOperand(x24));
4435 __ CalculateSVEAddress(x25, SVEMemOperand(x25, 0x42));
4436 __ CalculateSVEAddress(x26, SVEMemOperand(x26, 3, SVE_MUL_VL), 0);
4437 __ CalculateSVEAddress(x27, SVEMemOperand(x27, 0x42, SVE_MUL_VL), 3);
4438 __ CalculateSVEAddress(x28, SVEMemOperand(x28, x30));
4439 __ CalculateSVEAddress(x29, SVEMemOperand(x29, x30, LSL, 4));
Jacob Bramley1314c462019-08-08 10:54:16 +01004440
4441 END();
4442
4443 if (CAN_RUN()) {
4444 RUN();
4445
4446 uint64_t vl = config->sve_vl_in_bytes();
4447 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
4448 uint64_t pl = vl / kZRegBitsPerPRegBit;
4449
4450 // Simple scalar (or equivalent) cases.
4451 ASSERT_EQUAL_64(base, x0);
4452 ASSERT_EQUAL_64(base, x1);
4453 ASSERT_EQUAL_64(base, x2);
4454 ASSERT_EQUAL_64(base, x3);
4455 ASSERT_EQUAL_64(base, x4);
4456 ASSERT_EQUAL_64(base, x5);
4457
4458 // scalar-plus-immediate
4459 ASSERT_EQUAL_64(base + 42, x6);
4460 ASSERT_EQUAL_64(base - 42, x7);
4461 ASSERT_EQUAL_64(base + (31 * vl), x8);
4462 ASSERT_EQUAL_64(base - (32 * vl), x9);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004463 ASSERT_EQUAL_64(base + (42 * vl), x10);
4464 ASSERT_EQUAL_64(base - (42 * vl), x11);
4465 ASSERT_EQUAL_64(base - (32 * vl), x12);
Jacob Bramley1314c462019-08-08 10:54:16 +01004466 ASSERT_EQUAL_64(base - (42 * vl), x13);
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004467 ASSERT_EQUAL_64(base - (32 * vl), x14);
4468 ASSERT_EQUAL_64(base - (42 * vl), x15);
4469 ASSERT_EQUAL_64(base - (32 * vl), x18);
4470 ASSERT_EQUAL_64(base - (42 * vl), x19);
Jacob Bramley1314c462019-08-08 10:54:16 +01004471
4472 // scalar-plus-scalar
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004473 ASSERT_EQUAL_64(base + 48, x20);
4474 ASSERT_EQUAL_64(base - 48, x21);
4475 ASSERT_EQUAL_64(base + (48 << 8), x22);
4476 ASSERT_EQUAL_64(base - (48 << 8), x23);
Jacob Bramley1314c462019-08-08 10:54:16 +01004477
4478 // In-place updates.
Jacob Bramley6ebbba62019-10-09 15:02:10 +01004479 ASSERT_EQUAL_64(0xabcd000000000000, x24);
4480 ASSERT_EQUAL_64(0xabcd101100000000 + 0x42, x25);
4481 ASSERT_EQUAL_64(0xabcd202200000000 + (3 * vl), x26);
4482 ASSERT_EQUAL_64(0xabcd303300000000 + (0x42 * pl), x27);
4483 ASSERT_EQUAL_64(0xabcd404400000000 - 48, x28);
4484 ASSERT_EQUAL_64(0xabcd505500000000 - (48 << 4), x29);
Jacob Bramley1314c462019-08-08 10:54:16 +01004485 }
4486}
4487
TatWai Chong4f28df72019-08-14 17:50:30 -07004488TEST_SVE(sve_permute_vector_unpredicated) {
4489 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
4490 START();
4491
Jacob Bramleye4983d42019-10-08 10:56:15 +01004492 // Initialise registers with known values first.
4493 __ Dup(z1.VnB(), 0x11);
4494 __ Dup(z2.VnB(), 0x22);
4495 __ Dup(z3.VnB(), 0x33);
4496 __ Dup(z4.VnB(), 0x44);
4497
TatWai Chong4f28df72019-08-14 17:50:30 -07004498 __ Mov(x0, 0x0123456789abcdef);
4499 __ Fmov(d0, RawbitsToDouble(0x7ffaaaaa22223456));
4500 __ Insr(z1.VnS(), w0);
4501 __ Insr(z2.VnD(), x0);
4502 __ Insr(z3.VnH(), h0);
4503 __ Insr(z4.VnD(), d0);
4504
4505 uint64_t inputs[] = {0xfedcba9876543210,
4506 0x0123456789abcdef,
4507 0x8f8e8d8c8b8a8988,
4508 0x8786858483828180};
4509
4510 // Initialize a distinguishable value throughout the register first.
4511 __ Dup(z9.VnB(), 0xff);
4512 InsrHelper(&masm, z9.VnD(), inputs);
4513
4514 __ Rev(z5.VnB(), z9.VnB());
4515 __ Rev(z6.VnH(), z9.VnH());
4516 __ Rev(z7.VnS(), z9.VnS());
4517 __ Rev(z8.VnD(), z9.VnD());
4518
4519 int index[7] = {22, 7, 7, 3, 1, 1, 63};
4520 // Broadcasting an data within the input array.
4521 __ Dup(z10.VnB(), z9.VnB(), index[0]);
4522 __ Dup(z11.VnH(), z9.VnH(), index[1]);
4523 __ Dup(z12.VnS(), z9.VnS(), index[2]);
4524 __ Dup(z13.VnD(), z9.VnD(), index[3]);
4525 __ Dup(z14.VnQ(), z9.VnQ(), index[4]);
4526 // Test dst == src
4527 __ Mov(z15, z9);
4528 __ Dup(z15.VnS(), z15.VnS(), index[5]);
4529 // Selecting an data beyond the input array.
4530 __ Dup(z16.VnB(), z9.VnB(), index[6]);
4531
4532 END();
4533
4534 if (CAN_RUN()) {
4535 RUN();
4536
4537 // Insr
Jacob Bramleye4983d42019-10-08 10:56:15 +01004538 uint64_t z1_expected[] = {0x1111111111111111, 0x1111111189abcdef};
4539 uint64_t z2_expected[] = {0x2222222222222222, 0x0123456789abcdef};
4540 uint64_t z3_expected[] = {0x3333333333333333, 0x3333333333333456};
4541 uint64_t z4_expected[] = {0x4444444444444444, 0x7ffaaaaa22223456};
TatWai Chong4f28df72019-08-14 17:50:30 -07004542 ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
4543 ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
4544 ASSERT_EQUAL_SVE(z3_expected, z3.VnD());
4545 ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
4546
4547 // Rev
4548 int lane_count = core.GetSVELaneCount(kBRegSize);
4549 for (int i = 0; i < lane_count; i++) {
4550 uint64_t expected =
4551 core.zreg_lane(z5.GetCode(), kBRegSize, lane_count - i - 1);
4552 uint64_t input = core.zreg_lane(z9.GetCode(), kBRegSize, i);
4553 ASSERT_EQUAL_64(expected, input);
4554 }
4555
4556 lane_count = core.GetSVELaneCount(kHRegSize);
4557 for (int i = 0; i < lane_count; i++) {
4558 uint64_t expected =
4559 core.zreg_lane(z6.GetCode(), kHRegSize, lane_count - i - 1);
4560 uint64_t input = core.zreg_lane(z9.GetCode(), kHRegSize, i);
4561 ASSERT_EQUAL_64(expected, input);
4562 }
4563
4564 lane_count = core.GetSVELaneCount(kSRegSize);
4565 for (int i = 0; i < lane_count; i++) {
4566 uint64_t expected =
4567 core.zreg_lane(z7.GetCode(), kSRegSize, lane_count - i - 1);
4568 uint64_t input = core.zreg_lane(z9.GetCode(), kSRegSize, i);
4569 ASSERT_EQUAL_64(expected, input);
4570 }
4571
4572 lane_count = core.GetSVELaneCount(kDRegSize);
4573 for (int i = 0; i < lane_count; i++) {
4574 uint64_t expected =
4575 core.zreg_lane(z8.GetCode(), kDRegSize, lane_count - i - 1);
4576 uint64_t input = core.zreg_lane(z9.GetCode(), kDRegSize, i);
4577 ASSERT_EQUAL_64(expected, input);
4578 }
4579
4580 // Dup
4581 unsigned vl = config->sve_vl_in_bits();
4582 lane_count = core.GetSVELaneCount(kBRegSize);
4583 uint64_t expected_z10 = (vl > (index[0] * kBRegSize)) ? 0x23 : 0;
4584 for (int i = 0; i < lane_count; i++) {
4585 ASSERT_EQUAL_SVE_LANE(expected_z10, z10.VnB(), i);
4586 }
4587
4588 lane_count = core.GetSVELaneCount(kHRegSize);
4589 uint64_t expected_z11 = (vl > (index[1] * kHRegSize)) ? 0x8f8e : 0;
4590 for (int i = 0; i < lane_count; i++) {
4591 ASSERT_EQUAL_SVE_LANE(expected_z11, z11.VnH(), i);
4592 }
4593
4594 lane_count = core.GetSVELaneCount(kSRegSize);
4595 uint64_t expected_z12 = (vl > (index[2] * kSRegSize)) ? 0xfedcba98 : 0;
4596 for (int i = 0; i < lane_count; i++) {
4597 ASSERT_EQUAL_SVE_LANE(expected_z12, z12.VnS(), i);
4598 }
4599
4600 lane_count = core.GetSVELaneCount(kDRegSize);
4601 uint64_t expected_z13 =
4602 (vl > (index[3] * kDRegSize)) ? 0xfedcba9876543210 : 0;
4603 for (int i = 0; i < lane_count; i++) {
4604 ASSERT_EQUAL_SVE_LANE(expected_z13, z13.VnD(), i);
4605 }
4606
4607 lane_count = core.GetSVELaneCount(kDRegSize);
4608 uint64_t expected_z14_lo = 0;
4609 uint64_t expected_z14_hi = 0;
4610 if (vl > (index[4] * kQRegSize)) {
4611 expected_z14_lo = 0x0123456789abcdef;
4612 expected_z14_hi = 0xfedcba9876543210;
4613 }
4614 for (int i = 0; i < lane_count; i += 2) {
4615 ASSERT_EQUAL_SVE_LANE(expected_z14_lo, z14.VnD(), i);
4616 ASSERT_EQUAL_SVE_LANE(expected_z14_hi, z14.VnD(), i + 1);
4617 }
4618
4619 lane_count = core.GetSVELaneCount(kSRegSize);
4620 uint64_t expected_z15 = (vl > (index[5] * kSRegSize)) ? 0x87868584 : 0;
4621 for (int i = 0; i < lane_count; i++) {
4622 ASSERT_EQUAL_SVE_LANE(expected_z15, z15.VnS(), i);
4623 }
4624
4625 lane_count = core.GetSVELaneCount(kBRegSize);
4626 uint64_t expected_z16 = (vl > (index[6] * kBRegSize)) ? 0xff : 0;
4627 for (int i = 0; i < lane_count; i++) {
4628 ASSERT_EQUAL_SVE_LANE(expected_z16, z16.VnB(), i);
4629 }
4630 }
4631}
4632
4633TEST_SVE(sve_permute_vector_unpredicated_uppack_vector_elements) {
4634 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4635 START();
4636
4637 uint64_t z9_inputs[] = {0xfedcba9876543210,
4638 0x0123456789abcdef,
4639 0x8f8e8d8c8b8a8988,
4640 0x8786858483828180};
4641 InsrHelper(&masm, z9.VnD(), z9_inputs);
4642
4643 __ Sunpkhi(z10.VnH(), z9.VnB());
4644 __ Sunpkhi(z11.VnS(), z9.VnH());
4645 __ Sunpkhi(z12.VnD(), z9.VnS());
4646
4647 __ Sunpklo(z13.VnH(), z9.VnB());
4648 __ Sunpklo(z14.VnS(), z9.VnH());
4649 __ Sunpklo(z15.VnD(), z9.VnS());
4650
4651 __ Uunpkhi(z16.VnH(), z9.VnB());
4652 __ Uunpkhi(z17.VnS(), z9.VnH());
4653 __ Uunpkhi(z18.VnD(), z9.VnS());
4654
4655 __ Uunpklo(z19.VnH(), z9.VnB());
4656 __ Uunpklo(z20.VnS(), z9.VnH());
4657 __ Uunpklo(z21.VnD(), z9.VnS());
4658
4659 END();
4660
4661 if (CAN_RUN()) {
4662 RUN();
4663
4664 // Suunpkhi
4665 int lane_count = core.GetSVELaneCount(kHRegSize);
4666 for (int i = lane_count - 1; i >= 0; i--) {
4667 uint16_t expected = core.zreg_lane<uint16_t>(z10.GetCode(), i);
4668 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
4669 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
4670 ASSERT_EQUAL_64(expected, input);
4671 }
4672
4673 lane_count = core.GetSVELaneCount(kSRegSize);
4674 for (int i = lane_count - 1; i >= 0; i--) {
4675 uint32_t expected = core.zreg_lane<uint32_t>(z11.GetCode(), i);
4676 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
4677 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
4678 ASSERT_EQUAL_64(expected, input);
4679 }
4680
4681 lane_count = core.GetSVELaneCount(kDRegSize);
4682 for (int i = lane_count - 1; i >= 0; i--) {
4683 uint64_t expected = core.zreg_lane<uint64_t>(z12.GetCode(), i);
4684 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
4685 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
4686 ASSERT_EQUAL_64(expected, input);
4687 }
4688
4689 // Suunpklo
4690 lane_count = core.GetSVELaneCount(kHRegSize);
4691 for (int i = lane_count - 1; i >= 0; i--) {
4692 uint16_t expected = core.zreg_lane<uint16_t>(z13.GetCode(), i);
4693 uint8_t b_lane = core.zreg_lane<uint8_t>(z9.GetCode(), i);
4694 uint16_t input = SignExtend<int16_t>(b_lane, kBRegSize);
4695 ASSERT_EQUAL_64(expected, input);
4696 }
4697
4698 lane_count = core.GetSVELaneCount(kSRegSize);
4699 for (int i = lane_count - 1; i >= 0; i--) {
4700 uint32_t expected = core.zreg_lane<uint32_t>(z14.GetCode(), i);
4701 uint16_t h_lane = core.zreg_lane<uint16_t>(z9.GetCode(), i);
4702 uint32_t input = SignExtend<int32_t>(h_lane, kHRegSize);
4703 ASSERT_EQUAL_64(expected, input);
4704 }
4705
4706 lane_count = core.GetSVELaneCount(kDRegSize);
4707 for (int i = lane_count - 1; i >= 0; i--) {
4708 uint64_t expected = core.zreg_lane<uint64_t>(z15.GetCode(), i);
4709 uint32_t s_lane = core.zreg_lane<uint32_t>(z9.GetCode(), i);
4710 uint64_t input = SignExtend<int64_t>(s_lane, kSRegSize);
4711 ASSERT_EQUAL_64(expected, input);
4712 }
4713
4714 // Uuunpkhi
4715 lane_count = core.GetSVELaneCount(kHRegSize);
4716 for (int i = lane_count - 1; i >= 0; i--) {
4717 uint16_t expected = core.zreg_lane<uint16_t>(z16.GetCode(), i);
4718 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i + lane_count);
4719 ASSERT_EQUAL_64(expected, input);
4720 }
4721
4722 lane_count = core.GetSVELaneCount(kSRegSize);
4723 for (int i = lane_count - 1; i >= 0; i--) {
4724 uint32_t expected = core.zreg_lane<uint32_t>(z17.GetCode(), i);
4725 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i + lane_count);
4726 ASSERT_EQUAL_64(expected, input);
4727 }
4728
4729 lane_count = core.GetSVELaneCount(kDRegSize);
4730 for (int i = lane_count - 1; i >= 0; i--) {
4731 uint64_t expected = core.zreg_lane<uint64_t>(z18.GetCode(), i);
4732 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i + lane_count);
4733 ASSERT_EQUAL_64(expected, input);
4734 }
4735
4736 // Uuunpklo
4737 lane_count = core.GetSVELaneCount(kHRegSize);
4738 for (int i = lane_count - 1; i >= 0; i--) {
4739 uint16_t expected = core.zreg_lane<uint16_t>(z19.GetCode(), i);
4740 uint16_t input = core.zreg_lane<uint8_t>(z9.GetCode(), i);
4741 ASSERT_EQUAL_64(expected, input);
4742 }
4743
4744 lane_count = core.GetSVELaneCount(kSRegSize);
4745 for (int i = lane_count - 1; i >= 0; i--) {
4746 uint32_t expected = core.zreg_lane<uint32_t>(z20.GetCode(), i);
4747 uint32_t input = core.zreg_lane<uint16_t>(z9.GetCode(), i);
4748 ASSERT_EQUAL_64(expected, input);
4749 }
4750
4751 lane_count = core.GetSVELaneCount(kDRegSize);
4752 for (int i = lane_count - 1; i >= 0; i--) {
4753 uint64_t expected = core.zreg_lane<uint64_t>(z21.GetCode(), i);
4754 uint64_t input = core.zreg_lane<uint32_t>(z9.GetCode(), i);
4755 ASSERT_EQUAL_64(expected, input);
4756 }
4757 }
4758}
4759
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004760TEST_SVE(sve_cnot_not) {
4761 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4762 START();
4763
4764 uint64_t in[] = {0x0000000000000000, 0x00000000e1c30000, 0x123456789abcdef0};
4765
4766 // For simplicity, we re-use the same pg for various lane sizes.
4767 // For D lanes: 1, 1, 0
4768 // For S lanes: 1, 1, 1, 0, 0
4769 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4770 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4771 Initialise(&masm, p0.VnB(), pg_in);
4772 PRegisterM pg = p0.Merging();
4773
4774 // These are merging operations, so we have to initialise the result register.
4775 // We use a mixture of constructive and destructive operations.
4776
4777 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004778 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004779 __ Mov(z30, z31);
4780
4781 // For constructive operations, use a different initial result value.
4782 __ Index(z29.VnB(), 0, -1);
4783
4784 __ Mov(z0, z31);
4785 __ Cnot(z0.VnB(), pg, z0.VnB()); // destructive
4786 __ Mov(z1, z29);
4787 __ Cnot(z1.VnH(), pg, z31.VnH());
4788 __ Mov(z2, z31);
4789 __ Cnot(z2.VnS(), pg, z2.VnS()); // destructive
4790 __ Mov(z3, z29);
4791 __ Cnot(z3.VnD(), pg, z31.VnD());
4792
4793 __ Mov(z4, z29);
4794 __ Not(z4.VnB(), pg, z31.VnB());
4795 __ Mov(z5, z31);
4796 __ Not(z5.VnH(), pg, z5.VnH()); // destructive
4797 __ Mov(z6, z29);
4798 __ Not(z6.VnS(), pg, z31.VnS());
4799 __ Mov(z7, z31);
4800 __ Not(z7.VnD(), pg, z7.VnD()); // destructive
4801
4802 END();
4803
4804 if (CAN_RUN()) {
4805 RUN();
4806
4807 // Check that constructive operations preserve their inputs.
4808 ASSERT_EQUAL_SVE(z30, z31);
4809
4810 // clang-format off
4811
4812 // Cnot (B) destructive
4813 uint64_t expected_z0[] =
4814 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
4815 {0x0000000001000101, 0x01000001e1000101, 0x12340078000000f0};
4816 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4817
4818 // Cnot (H)
4819 uint64_t expected_z1[] =
4820 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4821 {0xe9eaebecedee0001, 0xf1f2000100000001, 0xf9fafbfc0000ff00};
4822 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4823
4824 // Cnot (S) destructive
4825 uint64_t expected_z2[] =
4826 // pg: 0 1 1 1 0 0
4827 {0x0000000000000001, 0x0000000100000000, 0x123456789abcdef0};
4828 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4829
4830 // Cnot (D)
4831 uint64_t expected_z3[] =
4832 // pg: 1 1 0
4833 {0x0000000000000001, 0x0000000000000000, 0xf9fafbfcfdfeff00};
4834 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4835
4836 // Not (B)
4837 uint64_t expected_z4[] =
4838 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
4839 {0xe9eaebecffeeffff, 0xfff2f3fff53cffff, 0xf9faa9fc65432100};
4840 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
4841
4842 // Not (H) destructive
4843 uint64_t expected_z5[] =
4844 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4845 {0x000000000000ffff, 0x0000ffff1e3cffff, 0x123456786543def0};
4846 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
4847
4848 // Not (S)
4849 uint64_t expected_z6[] =
4850 // pg: 0 1 1 1 0 0
4851 {0xe9eaebecffffffff, 0xffffffff1e3cffff, 0xf9fafbfcfdfeff00};
4852 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
4853
4854 // Not (D) destructive
4855 uint64_t expected_z7[] =
4856 // pg: 1 1 0
4857 {0xffffffffffffffff, 0xffffffff1e3cffff, 0x123456789abcdef0};
4858 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
4859
4860 // clang-format on
4861 }
4862}
4863
4864TEST_SVE(sve_fabs_fneg) {
4865 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4866 START();
4867
4868 // Include FP64, FP32 and FP16 signalling NaNs. Most FP operations quieten
4869 // NaNs, but fabs and fneg do not.
4870 uint64_t in[] = {0xc04500004228d140, // Recognisable (+/-42) values.
4871 0xfff00000ff80fc01, // Signalling NaNs.
4872 0x123456789abcdef0};
4873
4874 // For simplicity, we re-use the same pg for various lane sizes.
4875 // For D lanes: 1, 1, 0
4876 // For S lanes: 1, 1, 1, 0, 0
4877 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4878 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4879 Initialise(&masm, p0.VnB(), pg_in);
4880 PRegisterM pg = p0.Merging();
4881
4882 // These are merging operations, so we have to initialise the result register.
4883 // We use a mixture of constructive and destructive operations.
4884
4885 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004886 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004887 __ Mov(z30, z31);
4888
4889 // For constructive operations, use a different initial result value.
4890 __ Index(z29.VnB(), 0, -1);
4891
4892 __ Mov(z0, z29);
4893 __ Fabs(z0.VnH(), pg, z31.VnH());
4894 __ Mov(z1, z31);
4895 __ Fabs(z1.VnS(), pg, z1.VnS()); // destructive
4896 __ Mov(z2, z29);
4897 __ Fabs(z2.VnD(), pg, z31.VnD());
4898
4899 __ Mov(z3, z31);
4900 __ Fneg(z3.VnH(), pg, z3.VnH()); // destructive
4901 __ Mov(z4, z29);
4902 __ Fneg(z4.VnS(), pg, z31.VnS());
4903 __ Mov(z5, z31);
4904 __ Fneg(z5.VnD(), pg, z5.VnD()); // destructive
4905
4906 END();
4907
4908 if (CAN_RUN()) {
4909 RUN();
4910
4911 // Check that constructive operations preserve their inputs.
4912 ASSERT_EQUAL_SVE(z30, z31);
4913
4914 // clang-format off
4915
4916 // Fabs (H)
4917 uint64_t expected_z0[] =
4918 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4919 {0xe9eaebecedee5140, 0xf1f200007f807c01, 0xf9fafbfc1abcff00};
4920 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
4921
4922 // Fabs (S) destructive
4923 uint64_t expected_z1[] =
4924 // pg: 0 1 1 1 0 0
4925 {0xc04500004228d140, 0x7ff000007f80fc01, 0x123456789abcdef0};
4926 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
4927
4928 // Fabs (D)
4929 uint64_t expected_z2[] =
4930 // pg: 1 1 0
4931 {0x404500004228d140, 0x7ff00000ff80fc01, 0xf9fafbfcfdfeff00};
4932 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
4933
4934 // Fneg (H) destructive
4935 uint64_t expected_z3[] =
4936 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
4937 {0xc045000042285140, 0xfff080007f807c01, 0x123456781abcdef0};
4938 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
4939
4940 // Fneg (S)
4941 uint64_t expected_z4[] =
4942 // pg: 0 1 1 1 0 0
4943 {0xe9eaebecc228d140, 0x7ff000007f80fc01, 0xf9fafbfcfdfeff00};
4944 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
4945
4946 // Fneg (D) destructive
4947 uint64_t expected_z5[] =
4948 // pg: 1 1 0
4949 {0x404500004228d140, 0x7ff00000ff80fc01, 0x123456789abcdef0};
4950 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
4951
4952 // clang-format on
4953 }
4954}
4955
4956TEST_SVE(sve_cls_clz_cnt) {
4957 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
4958 START();
4959
4960 uint64_t in[] = {0x0000000000000000, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
4961
4962 // For simplicity, we re-use the same pg for various lane sizes.
4963 // For D lanes: 1, 1, 0
4964 // For S lanes: 1, 1, 1, 0, 0
4965 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
4966 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
4967 Initialise(&masm, p0.VnB(), pg_in);
4968 PRegisterM pg = p0.Merging();
4969
4970 // These are merging operations, so we have to initialise the result register.
4971 // We use a mixture of constructive and destructive operations.
4972
4973 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01004974 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01004975 __ Mov(z30, z31);
4976
4977 // For constructive operations, use a different initial result value.
4978 __ Index(z29.VnB(), 0, -1);
4979
4980 __ Mov(z0, z29);
4981 __ Cls(z0.VnB(), pg, z31.VnB());
4982 __ Mov(z1, z31);
4983 __ Clz(z1.VnH(), pg, z1.VnH()); // destructive
4984 __ Mov(z2, z29);
4985 __ Cnt(z2.VnS(), pg, z31.VnS());
4986 __ Mov(z3, z31);
4987 __ Cnt(z3.VnD(), pg, z3.VnD()); // destructive
4988
4989 END();
4990
4991 if (CAN_RUN()) {
4992 RUN();
4993 // Check that non-destructive operations preserve their inputs.
4994 ASSERT_EQUAL_SVE(z30, z31);
4995
4996 // clang-format off
4997
4998 // cls (B)
4999 uint8_t expected_z0[] =
5000 // pg: 0 0 0 0 1 0 1 1
5001 // pg: 1 0 0 1 0 1 1 1
5002 // pg: 0 0 1 0 1 1 1 0
5003 {0xe9, 0xea, 0xeb, 0xec, 7, 0xee, 7, 7,
5004 6, 0xf2, 0xf3, 3, 0xf5, 1, 0, 3,
5005 0xf9, 0xfa, 0, 0xfc, 0, 0, 1, 0x00};
5006 ASSERT_EQUAL_SVE(expected_z0, z0.VnB());
5007
5008 // clz (H) destructive
5009 uint16_t expected_z1[] =
5010 // pg: 0 0 0 1
5011 // pg: 0 1 1 1
5012 // pg: 0 0 1 0
5013 {0x0000, 0x0000, 0x0000, 16,
5014 0xfefc, 0, 0, 0,
5015 0x1234, 0x5678, 0, 0xdef0};
5016 ASSERT_EQUAL_SVE(expected_z1, z1.VnH());
5017
5018 // cnt (S)
5019 uint32_t expected_z2[] =
5020 // pg: 0 1
5021 // pg: 1 1
5022 // pg: 0 0
5023 {0xe9eaebec, 0,
5024 22, 16,
5025 0xf9fafbfc, 0xfdfeff00};
5026 ASSERT_EQUAL_SVE(expected_z2, z2.VnS());
5027
5028 // cnt (D) destructive
5029 uint64_t expected_z3[] =
5030 // pg: 1 1 0
5031 { 0, 38, 0x123456789abcdef0};
5032 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5033
5034 // clang-format on
5035 }
5036}
5037
5038TEST_SVE(sve_sxt) {
5039 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5040 START();
5041
5042 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
5043
5044 // For simplicity, we re-use the same pg for various lane sizes.
5045 // For D lanes: 1, 1, 0
5046 // For S lanes: 1, 1, 1, 0, 0
5047 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5048 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5049 Initialise(&masm, p0.VnB(), pg_in);
5050 PRegisterM pg = p0.Merging();
5051
5052 // These are merging operations, so we have to initialise the result register.
5053 // We use a mixture of constructive and destructive operations.
5054
5055 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005056 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005057 __ Mov(z30, z31);
5058
5059 // For constructive operations, use a different initial result value.
5060 __ Index(z29.VnB(), 0, -1);
5061
5062 __ Mov(z0, z31);
5063 __ Sxtb(z0.VnH(), pg, z0.VnH()); // destructive
5064 __ Mov(z1, z29);
5065 __ Sxtb(z1.VnS(), pg, z31.VnS());
5066 __ Mov(z2, z31);
5067 __ Sxtb(z2.VnD(), pg, z2.VnD()); // destructive
5068 __ Mov(z3, z29);
5069 __ Sxth(z3.VnS(), pg, z31.VnS());
5070 __ Mov(z4, z31);
5071 __ Sxth(z4.VnD(), pg, z4.VnD()); // destructive
5072 __ Mov(z5, z29);
5073 __ Sxtw(z5.VnD(), pg, z31.VnD());
5074
5075 END();
5076
5077 if (CAN_RUN()) {
5078 RUN();
5079 // Check that constructive operations preserve their inputs.
5080 ASSERT_EQUAL_SVE(z30, z31);
5081
5082 // clang-format off
5083
5084 // Sxtb (H) destructive
5085 uint64_t expected_z0[] =
5086 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5087 {0x01f203f405f6fff8, 0xfefcfff0ffc3000f, 0x12345678ffbcdef0};
5088 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5089
5090 // Sxtb (S)
5091 uint64_t expected_z1[] =
5092 // pg: 0 1 1 1 0 0
5093 {0xe9eaebecfffffff8, 0xfffffff00000000f, 0xf9fafbfcfdfeff00};
5094 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5095
5096 // Sxtb (D) destructive
5097 uint64_t expected_z2[] =
5098 // pg: 1 1 0
5099 {0xfffffffffffffff8, 0x000000000000000f, 0x123456789abcdef0};
5100 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5101
5102 // Sxth (S)
5103 uint64_t expected_z3[] =
5104 // pg: 0 1 1 1 0 0
5105 {0xe9eaebec000007f8, 0xfffff8f0ffff870f, 0xf9fafbfcfdfeff00};
5106 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5107
5108 // Sxth (D) destructive
5109 uint64_t expected_z4[] =
5110 // pg: 1 1 0
5111 {0x00000000000007f8, 0xffffffffffff870f, 0x123456789abcdef0};
5112 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5113
5114 // Sxtw (D)
5115 uint64_t expected_z5[] =
5116 // pg: 1 1 0
5117 {0x0000000005f607f8, 0xffffffffe1c3870f, 0xf9fafbfcfdfeff00};
5118 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5119
5120 // clang-format on
5121 }
5122}
5123
5124TEST_SVE(sve_uxt) {
5125 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5126 START();
5127
5128 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
5129
5130 // For simplicity, we re-use the same pg for various lane sizes.
5131 // For D lanes: 1, 1, 0
5132 // For S lanes: 1, 1, 1, 0, 0
5133 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5134 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5135 Initialise(&masm, p0.VnB(), pg_in);
5136 PRegisterM pg = p0.Merging();
5137
5138 // These are merging operations, so we have to initialise the result register.
5139 // We use a mixture of constructive and destructive operations.
5140
5141 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005142 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005143 __ Mov(z30, z31);
5144
5145 // For constructive operations, use a different initial result value.
5146 __ Index(z29.VnB(), 0, -1);
5147
5148 __ Mov(z0, z29);
5149 __ Uxtb(z0.VnH(), pg, z31.VnH());
5150 __ Mov(z1, z31);
5151 __ Uxtb(z1.VnS(), pg, z1.VnS()); // destructive
5152 __ Mov(z2, z29);
5153 __ Uxtb(z2.VnD(), pg, z31.VnD());
5154 __ Mov(z3, z31);
5155 __ Uxth(z3.VnS(), pg, z3.VnS()); // destructive
5156 __ Mov(z4, z29);
5157 __ Uxth(z4.VnD(), pg, z31.VnD());
5158 __ Mov(z5, z31);
5159 __ Uxtw(z5.VnD(), pg, z5.VnD()); // destructive
5160
5161 END();
5162
5163 if (CAN_RUN()) {
5164 RUN();
5165 // clang-format off
5166
5167 // Uxtb (H)
5168 uint64_t expected_z0[] =
5169 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5170 {0xe9eaebecedee00f8, 0xf1f200f000c3000f, 0xf9fafbfc00bcff00};
5171 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5172
5173 // Uxtb (S) destructive
5174 uint64_t expected_z1[] =
5175 // pg: 0 1 1 1 0 0
5176 {0x01f203f4000000f8, 0x000000f00000000f, 0x123456789abcdef0};
5177 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5178
5179 // Uxtb (D)
5180 uint64_t expected_z2[] =
5181 // pg: 1 1 0
5182 {0x00000000000000f8, 0x000000000000000f, 0xf9fafbfcfdfeff00};
5183 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5184
5185 // Uxth (S) destructive
5186 uint64_t expected_z3[] =
5187 // pg: 0 1 1 1 0 0
5188 {0x01f203f4000007f8, 0x0000f8f00000870f, 0x123456789abcdef0};
5189 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5190
5191 // Uxth (D)
5192 uint64_t expected_z4[] =
5193 // pg: 1 1 0
5194 {0x00000000000007f8, 0x000000000000870f, 0xf9fafbfcfdfeff00};
5195 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5196
5197 // Uxtw (D) destructive
5198 uint64_t expected_z5[] =
5199 // pg: 1 1 0
5200 {0x0000000005f607f8, 0x00000000e1c3870f, 0x123456789abcdef0};
5201 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5202
5203 // clang-format on
5204 }
5205}
5206
5207TEST_SVE(sve_abs_neg) {
5208 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5209 START();
5210
5211 uint64_t in[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
5212
5213 // For simplicity, we re-use the same pg for various lane sizes.
5214 // For D lanes: 1, 1, 0
5215 // For S lanes: 1, 1, 1, 0, 0
5216 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
5217 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
5218 Initialise(&masm, p0.VnB(), pg_in);
5219 PRegisterM pg = p0.Merging();
5220
5221 InsrHelper(&masm, z31.VnD(), in);
5222
5223 // These are merging operations, so we have to initialise the result register.
5224 // We use a mixture of constructive and destructive operations.
5225
5226 InsrHelper(&masm, z31.VnD(), in);
TatWai Chong6995bfd2019-09-26 10:48:05 +01005227 // Make a copy so we can check that constructive operations preserve zn.
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005228 __ Mov(z30, z31);
5229
5230 // For constructive operations, use a different initial result value.
5231 __ Index(z29.VnB(), 0, -1);
5232
5233 __ Mov(z0, z31);
5234 __ Abs(z0.VnD(), pg, z0.VnD()); // destructive
5235 __ Mov(z1, z29);
5236 __ Abs(z1.VnB(), pg, z31.VnB());
5237
5238 __ Mov(z2, z31);
5239 __ Neg(z2.VnH(), pg, z2.VnH()); // destructive
5240 __ Mov(z3, z29);
5241 __ Neg(z3.VnS(), pg, z31.VnS());
5242
Jacob Bramleyc0066272019-09-30 16:30:47 +01005243 // The unpredicated form of `Neg` is implemented using `subr`.
5244 __ Mov(z4, z31);
5245 __ Neg(z4.VnB(), z4.VnB()); // destructive
5246 __ Mov(z5, z29);
5247 __ Neg(z5.VnD(), z31.VnD());
5248
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005249 END();
5250
5251 if (CAN_RUN()) {
5252 RUN();
Jacob Bramleyc0066272019-09-30 16:30:47 +01005253
5254 ASSERT_EQUAL_SVE(z30, z31);
5255
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005256 // clang-format off
5257
5258 // Abs (D) destructive
5259 uint64_t expected_z0[] =
5260 // pg: 1 1 0
5261 {0x01f203f405f607f8, 0x0103070f1e3c78f1, 0x123456789abcdef0};
5262 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5263
5264 // Abs (B)
5265 uint64_t expected_z1[] =
5266 // pg: 0 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 0
5267 {0xe9eaebec05ee0708, 0x02f2f310f53d790f, 0xf9fa56fc66442200};
5268 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5269
5270 // Neg (H) destructive
5271 uint64_t expected_z2[] =
5272 // pg: 0 0 0 1 0 1 1 1 0 0 1 0
5273 {0x01f203f405f6f808, 0xfefc07101e3d78f1, 0x123456786544def0};
5274 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5275
5276 // Neg (S)
5277 uint64_t expected_z3[] =
5278 // pg: 0 1 1 1 0 0
5279 {0xe9eaebecfa09f808, 0x010307101e3c78f1, 0xf9fafbfcfdfeff00};
5280 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5281
Jacob Bramleyc0066272019-09-30 16:30:47 +01005282 // Neg (B) destructive, unpredicated
5283 uint64_t expected_z4[] =
5284 {0xff0efd0cfb0af908, 0x020408101f3d79f1, 0xeeccaa8866442210};
5285 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5286
5287 // Neg (D) unpredicated
5288 uint64_t expected_z5[] =
5289 {0xfe0dfc0bfa09f808, 0x0103070f1e3c78f1, 0xedcba98765432110};
5290 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5291
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01005292 // clang-format on
5293 }
5294}
5295
Jacob Bramley0093bb92019-10-04 15:54:10 +01005296TEST_SVE(sve_cpy) {
5297 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
5298 START();
5299
5300 // For simplicity, we re-use the same pg for various lane sizes.
5301 // For D lanes: 0, 1, 1
5302 // For S lanes: 0, 1, 1, 0, 1
5303 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
5304 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
5305
5306 PRegisterM pg = p7.Merging();
5307 Initialise(&masm, pg.VnB(), pg_in);
5308
5309 // These are merging operations, so we have to initialise the result registers
5310 // for each operation.
5311 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
5312 __ Index(ZRegister(i, kBRegSize), 0, -1);
5313 }
5314
5315 // Recognisable values to copy.
5316 __ Mov(x0, 0xdeadbeefdeadbe42);
5317 __ Mov(x1, 0xdeadbeefdead8421);
5318 __ Mov(x2, 0xdeadbeef80042001);
5319 __ Mov(x3, 0x8000000420000001);
5320
5321 // Use NEON moves, to avoid testing SVE `cpy` against itself.
5322 __ Dup(v28.V2D(), x0);
5323 __ Dup(v29.V2D(), x1);
5324 __ Dup(v30.V2D(), x2);
5325 __ Dup(v31.V2D(), x3);
5326
5327 // Register forms (CPY_z_p_r)
5328 __ Cpy(z0.VnB(), pg, w0);
5329 __ Cpy(z1.VnH(), pg, x1); // X registers are accepted for small lanes.
5330 __ Cpy(z2.VnS(), pg, w2);
5331 __ Cpy(z3.VnD(), pg, x3);
5332
5333 // VRegister forms (CPY_z_p_v)
5334 __ Cpy(z4.VnB(), pg, b28);
5335 __ Cpy(z5.VnH(), pg, h29);
5336 __ Cpy(z6.VnS(), pg, s30);
5337 __ Cpy(z7.VnD(), pg, d31);
5338
5339 // Check that we can copy the stack pointer.
5340 __ Mov(x10, sp);
5341 __ Mov(sp, 0xabcabcabcabcabca); // Set sp to a known value.
5342 __ Cpy(z16.VnB(), pg, sp);
5343 __ Cpy(z17.VnH(), pg, wsp);
5344 __ Cpy(z18.VnS(), pg, wsp);
5345 __ Cpy(z19.VnD(), pg, sp);
5346 __ Mov(sp, x10); // Restore sp.
5347
5348 END();
5349
5350 if (CAN_RUN()) {
5351 RUN();
5352 // clang-format off
5353
5354 uint64_t expected_b[] =
5355 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5356 {0xe9eaebec424242f0, 0x42f2f34242f64242, 0xf942fbfcfdfeff42};
5357 ASSERT_EQUAL_SVE(expected_b, z0.VnD());
5358 ASSERT_EQUAL_SVE(expected_b, z4.VnD());
5359
5360 uint64_t expected_h[] =
5361 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5362 {0xe9eaebec8421eff0, 0xf1f28421f5f68421, 0x8421fbfcfdfe8421};
5363 ASSERT_EQUAL_SVE(expected_h, z1.VnD());
5364 ASSERT_EQUAL_SVE(expected_h, z5.VnD());
5365
5366 uint64_t expected_s[] =
5367 // pg: 0 0 1 1 0 1
5368 {0xe9eaebecedeeeff0, 0x8004200180042001, 0xf9fafbfc80042001};
5369 ASSERT_EQUAL_SVE(expected_s, z2.VnD());
5370 ASSERT_EQUAL_SVE(expected_s, z6.VnD());
5371
5372 uint64_t expected_d[] =
5373 // pg: 0 1 1
5374 {0xe9eaebecedeeeff0, 0x8000000420000001, 0x8000000420000001};
5375 ASSERT_EQUAL_SVE(expected_d, z3.VnD());
5376 ASSERT_EQUAL_SVE(expected_d, z7.VnD());
5377
5378
5379 uint64_t expected_b_sp[] =
5380 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5381 {0xe9eaebeccacacaf0, 0xcaf2f3cacaf6caca, 0xf9cafbfcfdfeffca};
5382 ASSERT_EQUAL_SVE(expected_b_sp, z16.VnD());
5383
5384 uint64_t expected_h_sp[] =
5385 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5386 {0xe9eaebecabcaeff0, 0xf1f2abcaf5f6abca, 0xabcafbfcfdfeabca};
5387 ASSERT_EQUAL_SVE(expected_h_sp, z17.VnD());
5388
5389 uint64_t expected_s_sp[] =
5390 // pg: 0 0 1 1 0 1
5391 {0xe9eaebecedeeeff0, 0xcabcabcacabcabca, 0xf9fafbfccabcabca};
5392 ASSERT_EQUAL_SVE(expected_s_sp, z18.VnD());
5393
5394 uint64_t expected_d_sp[] =
5395 // pg: 0 1 1
5396 {0xe9eaebecedeeeff0, 0xabcabcabcabcabca, 0xabcabcabcabcabca};
5397 ASSERT_EQUAL_SVE(expected_d_sp, z19.VnD());
5398
5399 // clang-format on
5400 }
5401}
5402
Jacob Bramley0f62eab2019-10-23 17:07:47 +01005403TEST_SVE(sve_cpy_imm) {
5404 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5405 START();
5406
5407 // For simplicity, we re-use the same pg for various lane sizes.
5408 // For D lanes: 0, 1, 1
5409 // For S lanes: 0, 1, 1, 0, 1
5410 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
5411 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
5412
5413 PRegister pg = p7;
5414 Initialise(&masm, pg.VnB(), pg_in);
5415
5416 // These are (mostly) merging operations, so we have to initialise the result
5417 // registers for each operation.
5418 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
5419 __ Index(ZRegister(i, kBRegSize), 0, -1);
5420 }
5421
5422 // Encodable integer forms (CPY_z_p_i)
5423 __ Cpy(z0.VnB(), pg.Merging(), 0);
5424 __ Cpy(z1.VnB(), pg.Zeroing(), 42);
5425 __ Cpy(z2.VnB(), pg.Merging(), -42);
5426 __ Cpy(z3.VnB(), pg.Zeroing(), 0xff);
5427 __ Cpy(z4.VnH(), pg.Merging(), 127);
5428 __ Cpy(z5.VnS(), pg.Zeroing(), -128);
5429 __ Cpy(z6.VnD(), pg.Merging(), -1);
5430
5431 // Forms encodable using fcpy.
5432 __ Cpy(z7.VnH(), pg.Merging(), Float16ToRawbits(Float16(-31.0)));
5433 __ Cpy(z8.VnS(), pg.Zeroing(), FloatToRawbits(2.0f));
5434 __ Cpy(z9.VnD(), pg.Merging(), DoubleToRawbits(-4.0));
5435
5436 // Other forms use a scratch register.
5437 __ Cpy(z10.VnH(), pg.Merging(), 0xff);
5438 __ Cpy(z11.VnD(), pg.Zeroing(), 0x0123456789abcdef);
5439
5440 END();
5441
5442 if (CAN_RUN()) {
5443 RUN();
5444 // clang-format off
5445
5446 uint64_t expected_z0[] =
5447 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5448 {0xe9eaebec000000f0, 0x00f2f30000f60000, 0xf900fbfcfdfeff00};
5449 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
5450
5451 uint64_t expected_z1[] =
5452 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5453 {0x000000002a2a2a00, 0x2a00002a2a002a2a, 0x002a00000000002a};
5454 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5455
5456 uint64_t expected_z2[] =
5457 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5458 {0xe9eaebecd6d6d6f0, 0xd6f2f3d6d6f6d6d6, 0xf9d6fbfcfdfeffd6};
5459 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5460
5461 uint64_t expected_z3[] =
5462 // pg: 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1
5463 {0x00000000ffffff00, 0xff0000ffff00ffff, 0x00ff0000000000ff};
5464 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5465
5466 uint64_t expected_z4[] =
5467 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5468 {0xe9eaebec007feff0, 0xf1f2007ff5f6007f, 0x007ffbfcfdfe007f};
5469 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5470
5471 uint64_t expected_z5[] =
5472 // pg: 0 0 1 1 0 1
5473 {0x0000000000000000, 0xffffff80ffffff80, 0x00000000ffffff80};
5474 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5475
5476 uint64_t expected_z6[] =
5477 // pg: 0 1 1
5478 {0xe9eaebecedeeeff0, 0xffffffffffffffff, 0xffffffffffffffff};
5479 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
5480
5481 uint64_t expected_z7[] =
5482 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5483 {0xe9eaebeccfc0eff0, 0xf1f2cfc0f5f6cfc0, 0xcfc0fbfcfdfecfc0};
5484 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
5485
5486 uint64_t expected_z8[] =
5487 // pg: 0 0 1 1 0 1
5488 {0x0000000000000000, 0x4000000040000000, 0x0000000040000000};
5489 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
5490
5491 uint64_t expected_z9[] =
5492 // pg: 0 1 1
5493 {0xe9eaebecedeeeff0, 0xc010000000000000, 0xc010000000000000};
5494 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
5495
5496 uint64_t expected_z10[] =
5497 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5498 {0xe9eaebec00ffeff0, 0xf1f200fff5f600ff, 0x00fffbfcfdfe00ff};
5499 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
5500
5501 uint64_t expected_z11[] =
5502 // pg: 0 1 1
5503 {0x0000000000000000, 0x0123456789abcdef, 0x0123456789abcdef};
5504 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
5505
5506 // clang-format on
5507 }
5508}
5509
5510TEST_SVE(sve_fcpy_imm) {
5511 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5512 START();
5513
5514 // For simplicity, we re-use the same pg for various lane sizes.
5515 // For D lanes: 0, 1, 1
5516 // For S lanes: 0, 1, 1, 0, 1
5517 // For H lanes: 1, 0, 0, 1, 0, 1, 1, 0, 0, 1
5518 int pg_in[] = {1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1};
5519
5520 PRegister pg = p7;
5521 Initialise(&masm, pg.VnB(), pg_in);
5522
5523 // These are (mostly) merging operations, so we have to initialise the result
5524 // registers for each operation.
5525 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
5526 __ Index(ZRegister(i, kBRegSize), 0, -1);
5527 }
5528
5529 // Encodable floating-point forms (FCPY_z_p_i)
5530 __ Fcpy(z1.VnH(), pg.Merging(), Float16(1.0));
5531 __ Fcpy(z2.VnH(), pg.Merging(), -2.0f);
5532 __ Fcpy(z3.VnH(), pg.Merging(), 3.0);
5533 __ Fcpy(z4.VnS(), pg.Merging(), Float16(-4.0));
5534 __ Fcpy(z5.VnS(), pg.Merging(), 5.0f);
5535 __ Fcpy(z6.VnS(), pg.Merging(), 6.0);
5536 __ Fcpy(z7.VnD(), pg.Merging(), Float16(7.0));
5537 __ Fcpy(z8.VnD(), pg.Merging(), 8.0f);
5538 __ Fcpy(z9.VnD(), pg.Merging(), -9.0);
5539
5540 // Unencodable immediates.
5541 __ Fcpy(z10.VnS(), pg.Merging(), 0.0);
5542 __ Fcpy(z11.VnH(), pg.Merging(), Float16(42.0));
5543 __ Fcpy(z12.VnD(), pg.Merging(), RawbitsToDouble(0x7ff0000012340000)); // NaN
5544 __ Fcpy(z13.VnH(), pg.Merging(), kFP64NegativeInfinity);
5545
5546 END();
5547
5548 if (CAN_RUN()) {
5549 RUN();
5550 // clang-format off
5551
5552 // 1.0 as FP16: 0x3c00
5553 uint64_t expected_z1[] =
5554 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5555 {0xe9eaebec3c00eff0, 0xf1f23c00f5f63c00, 0x3c00fbfcfdfe3c00};
5556 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
5557
5558 // -2.0 as FP16: 0xc000
5559 uint64_t expected_z2[] =
5560 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5561 {0xe9eaebecc000eff0, 0xf1f2c000f5f6c000, 0xc000fbfcfdfec000};
5562 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
5563
5564 // 3.0 as FP16: 0x4200
5565 uint64_t expected_z3[] =
5566 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5567 {0xe9eaebec4200eff0, 0xf1f24200f5f64200, 0x4200fbfcfdfe4200};
5568 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
5569
5570 // -4.0 as FP32: 0xc0800000
5571 uint64_t expected_z4[] =
5572 // pg: 0 0 1 1 0 1
5573 {0xe9eaebecedeeeff0, 0xc0800000c0800000, 0xf9fafbfcc0800000};
5574 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
5575
5576 // 5.0 as FP32: 0x40a00000
5577 uint64_t expected_z5[] =
5578 // pg: 0 0 1 1 0 1
5579 {0xe9eaebecedeeeff0, 0x40a0000040a00000, 0xf9fafbfc40a00000};
5580 ASSERT_EQUAL_SVE(expected_z5, z5.VnD());
5581
5582 // 6.0 as FP32: 0x40c00000
5583 uint64_t expected_z6[] =
5584 // pg: 0 0 1 1 0 1
5585 {0xe9eaebecedeeeff0, 0x40c0000040c00000, 0xf9fafbfc40c00000};
5586 ASSERT_EQUAL_SVE(expected_z6, z6.VnD());
5587
5588 // 7.0 as FP64: 0x401c000000000000
5589 uint64_t expected_z7[] =
5590 // pg: 0 1 1
5591 {0xe9eaebecedeeeff0, 0x401c000000000000, 0x401c000000000000};
5592 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
5593
5594 // 8.0 as FP64: 0x4020000000000000
5595 uint64_t expected_z8[] =
5596 // pg: 0 1 1
5597 {0xe9eaebecedeeeff0, 0x4020000000000000, 0x4020000000000000};
5598 ASSERT_EQUAL_SVE(expected_z8, z8.VnD());
5599
5600 // -9.0 as FP64: 0xc022000000000000
5601 uint64_t expected_z9[] =
5602 // pg: 0 1 1
5603 {0xe9eaebecedeeeff0, 0xc022000000000000, 0xc022000000000000};
5604 ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
5605
5606 // 0.0 as FP32: 0x00000000
5607 uint64_t expected_z10[] =
5608 // pg: 0 0 1 1 0 1
5609 {0xe9eaebecedeeeff0, 0x0000000000000000, 0xf9fafbfc00000000};
5610 ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
5611
5612 // 42.0 as FP16: 0x5140
5613 uint64_t expected_z11[] =
5614 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5615 {0xe9eaebec5140eff0, 0xf1f25140f5f65140, 0x5140fbfcfdfe5140};
5616 ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
5617
5618 // Signalling NaN (with payload): 0x7ff0000012340000
5619 uint64_t expected_z12[] =
5620 // pg: 0 1 1
5621 {0xe9eaebecedeeeff0, 0x7ff0000012340000, 0x7ff0000012340000};
5622 ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
5623
5624 // -infinity as FP16: 0xfc00
5625 uint64_t expected_z13[] =
5626 // pg: 0 0 1 0 0 1 0 1 1 0 0 1
5627 {0xe9eaebecfc00eff0, 0xf1f2fc00f5f6fc00, 0xfc00fbfcfdfefc00};
5628 ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
5629
5630 // clang-format on
5631 }
5632}
5633
TatWai Chong4f28df72019-08-14 17:50:30 -07005634TEST_SVE(sve_permute_vector_unpredicated_table_lookup) {
5635 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5636 START();
5637
5638 uint64_t table_inputs[] = {0xffeeddccbbaa9988, 0x7766554433221100};
5639
5640 int index_b[] = {255, 255, 11, 10, 15, 14, 13, 12, 1, 0, 4, 3, 7, 6, 5, 4};
5641
5642 int index_h[] = {5, 6, 7, 8, 2, 3, 6, 4};
5643
5644 int index_s[] = {1, 3, 2, 31, -1};
5645
5646 int index_d[] = {31, 1};
5647
5648 // Initialize the register with a value that doesn't existed in the table.
5649 __ Dup(z9.VnB(), 0x1f);
5650 InsrHelper(&masm, z9.VnD(), table_inputs);
5651
5652 ZRegister ind_b = z0.WithLaneSize(kBRegSize);
5653 ZRegister ind_h = z1.WithLaneSize(kHRegSize);
5654 ZRegister ind_s = z2.WithLaneSize(kSRegSize);
5655 ZRegister ind_d = z3.WithLaneSize(kDRegSize);
5656
5657 InsrHelper(&masm, ind_b, index_b);
5658 InsrHelper(&masm, ind_h, index_h);
5659 InsrHelper(&masm, ind_s, index_s);
5660 InsrHelper(&masm, ind_d, index_d);
5661
5662 __ Tbl(z26.VnB(), z9.VnB(), ind_b);
5663
5664 __ Tbl(z27.VnH(), z9.VnH(), ind_h);
5665
5666 __ Tbl(z28.VnS(), z9.VnS(), ind_s);
5667
5668 __ Tbl(z29.VnD(), z9.VnD(), ind_d);
5669
5670 END();
5671
5672 if (CAN_RUN()) {
5673 RUN();
5674
5675 // clang-format off
5676 unsigned z26_expected[] = {0x1f, 0x1f, 0xbb, 0xaa, 0xff, 0xee, 0xdd, 0xcc,
5677 0x11, 0x00, 0x44, 0x33, 0x77, 0x66, 0x55, 0x44};
5678
5679 unsigned z27_expected[] = {0xbbaa, 0xddcc, 0xffee, 0x1f1f,
5680 0x5544, 0x7766, 0xddcc, 0x9988};
5681
5682 unsigned z28_expected[] =
5683 {0x77665544, 0xffeeddcc, 0xbbaa9988, 0x1f1f1f1f, 0x1f1f1f1f};
5684
5685 uint64_t z29_expected[] = {0x1f1f1f1f1f1f1f1f, 0xffeeddccbbaa9988};
5686 // clang-format on
5687
5688 unsigned vl = config->sve_vl_in_bits();
5689 for (size_t i = 0; i < ArrayLength(index_b); i++) {
5690 int lane = static_cast<int>(ArrayLength(index_b) - i - 1);
5691 if (!core.HasSVELane(z26.VnB(), lane)) break;
5692 uint64_t expected = (vl > (index_b[i] * kBRegSize)) ? z26_expected[i] : 0;
5693 ASSERT_EQUAL_SVE_LANE(expected, z26.VnB(), lane);
5694 }
5695
5696 for (size_t i = 0; i < ArrayLength(index_h); i++) {
5697 int lane = static_cast<int>(ArrayLength(index_h) - i - 1);
5698 if (!core.HasSVELane(z27.VnH(), lane)) break;
5699 uint64_t expected = (vl > (index_h[i] * kHRegSize)) ? z27_expected[i] : 0;
5700 ASSERT_EQUAL_SVE_LANE(expected, z27.VnH(), lane);
5701 }
5702
5703 for (size_t i = 0; i < ArrayLength(index_s); i++) {
5704 int lane = static_cast<int>(ArrayLength(index_s) - i - 1);
5705 if (!core.HasSVELane(z28.VnS(), lane)) break;
5706 uint64_t expected = (vl > (index_s[i] * kSRegSize)) ? z28_expected[i] : 0;
5707 ASSERT_EQUAL_SVE_LANE(expected, z28.VnS(), lane);
5708 }
5709
5710 for (size_t i = 0; i < ArrayLength(index_d); i++) {
5711 int lane = static_cast<int>(ArrayLength(index_d) - i - 1);
5712 if (!core.HasSVELane(z29.VnD(), lane)) break;
5713 uint64_t expected = (vl > (index_d[i] * kDRegSize)) ? z29_expected[i] : 0;
5714 ASSERT_EQUAL_SVE_LANE(expected, z29.VnD(), lane);
5715 }
5716 }
5717}
5718
Jacob Bramley199339d2019-08-05 18:49:13 +01005719TEST_SVE(ldr_str_z_bi) {
5720 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5721 START();
5722
5723 int vl = config->sve_vl_in_bytes();
5724
5725 // The immediate can address [-256, 255] times the VL, so allocate enough
5726 // space to exceed that in both directions.
5727 int data_size = vl * 1024;
5728
5729 uint8_t* data = new uint8_t[data_size];
5730 memset(data, 0, data_size);
5731
5732 // Set the base half-way through the buffer so we can use negative indices.
5733 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
5734
5735 __ Index(z1.VnB(), 1, 3);
5736 __ Index(z2.VnB(), 2, 5);
5737 __ Index(z3.VnB(), 3, 7);
5738 __ Index(z4.VnB(), 4, 11);
5739 __ Index(z5.VnB(), 5, 13);
5740 __ Index(z6.VnB(), 6, 2);
5741 __ Index(z7.VnB(), 7, 3);
5742 __ Index(z8.VnB(), 8, 5);
5743 __ Index(z9.VnB(), 9, 7);
5744
5745 // Encodable cases.
5746 __ Str(z1, SVEMemOperand(x0));
5747 __ Str(z2, SVEMemOperand(x0, 2, SVE_MUL_VL));
5748 __ Str(z3, SVEMemOperand(x0, -3, SVE_MUL_VL));
5749 __ Str(z4, SVEMemOperand(x0, 255, SVE_MUL_VL));
5750 __ Str(z5, SVEMemOperand(x0, -256, SVE_MUL_VL));
5751
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005752 // Cases that fall back on `CalculateSVEAddress`.
Jacob Bramley199339d2019-08-05 18:49:13 +01005753 __ Str(z6, SVEMemOperand(x0, 6 * vl));
5754 __ Str(z7, SVEMemOperand(x0, -7 * vl));
5755 __ Str(z8, SVEMemOperand(x0, 314, SVE_MUL_VL));
5756 __ Str(z9, SVEMemOperand(x0, -314, SVE_MUL_VL));
5757
5758 // Corresponding loads.
5759 __ Ldr(z11, SVEMemOperand(x0, xzr)); // Test xzr operand.
5760 __ Ldr(z12, SVEMemOperand(x0, 2, SVE_MUL_VL));
5761 __ Ldr(z13, SVEMemOperand(x0, -3, SVE_MUL_VL));
5762 __ Ldr(z14, SVEMemOperand(x0, 255, SVE_MUL_VL));
5763 __ Ldr(z15, SVEMemOperand(x0, -256, SVE_MUL_VL));
5764
5765 __ Ldr(z16, SVEMemOperand(x0, 6 * vl));
5766 __ Ldr(z17, SVEMemOperand(x0, -7 * vl));
5767 __ Ldr(z18, SVEMemOperand(x0, 314, SVE_MUL_VL));
5768 __ Ldr(z19, SVEMemOperand(x0, -314, SVE_MUL_VL));
5769
5770 END();
5771
5772 if (CAN_RUN()) {
5773 RUN();
5774
5775 uint8_t* expected = new uint8_t[data_size];
5776 memset(expected, 0, data_size);
5777 uint8_t* middle = &expected[data_size / 2];
5778
5779 for (int i = 0; i < vl; i++) {
5780 middle[i] = (1 + (3 * i)) & 0xff; // z1
5781 middle[(2 * vl) + i] = (2 + (5 * i)) & 0xff; // z2
5782 middle[(-3 * vl) + i] = (3 + (7 * i)) & 0xff; // z3
5783 middle[(255 * vl) + i] = (4 + (11 * i)) & 0xff; // z4
5784 middle[(-256 * vl) + i] = (5 + (13 * i)) & 0xff; // z5
5785 middle[(6 * vl) + i] = (6 + (2 * i)) & 0xff; // z6
5786 middle[(-7 * vl) + i] = (7 + (3 * i)) & 0xff; // z7
5787 middle[(314 * vl) + i] = (8 + (5 * i)) & 0xff; // z8
5788 middle[(-314 * vl) + i] = (9 + (7 * i)) & 0xff; // z9
5789 }
5790
Jacob Bramley33c99f92019-10-08 15:24:12 +01005791 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley199339d2019-08-05 18:49:13 +01005792
5793 ASSERT_EQUAL_SVE(z1, z11);
5794 ASSERT_EQUAL_SVE(z2, z12);
5795 ASSERT_EQUAL_SVE(z3, z13);
5796 ASSERT_EQUAL_SVE(z4, z14);
5797 ASSERT_EQUAL_SVE(z5, z15);
5798 ASSERT_EQUAL_SVE(z6, z16);
5799 ASSERT_EQUAL_SVE(z7, z17);
5800 ASSERT_EQUAL_SVE(z8, z18);
5801 ASSERT_EQUAL_SVE(z9, z19);
5802
5803 delete[] expected;
5804 }
5805 delete[] data;
5806}
5807
5808TEST_SVE(ldr_str_p_bi) {
5809 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5810 START();
5811
5812 int vl = config->sve_vl_in_bytes();
5813 VIXL_ASSERT((vl % kZRegBitsPerPRegBit) == 0);
5814 int pl = vl / kZRegBitsPerPRegBit;
5815
5816 // The immediate can address [-256, 255] times the PL, so allocate enough
5817 // space to exceed that in both directions.
5818 int data_size = pl * 1024;
5819
5820 uint8_t* data = new uint8_t[data_size];
5821 memset(data, 0, data_size);
5822
5823 // Set the base half-way through the buffer so we can use negative indices.
5824 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
5825
5826 uint64_t pattern[4] = {0x1010101011101111,
5827 0x0010111011000101,
5828 0x1001101110010110,
5829 0x1010110101100011};
5830 for (int i = 8; i <= 15; i++) {
5831 // Initialise p8-p15 with a conveniently-recognisable, non-zero pattern.
5832 Initialise(&masm,
5833 PRegister(i),
5834 pattern[3] * i,
5835 pattern[2] * i,
5836 pattern[1] * i,
5837 pattern[0] * i);
5838 }
5839
5840 // Encodable cases.
5841 __ Str(p8, SVEMemOperand(x0));
5842 __ Str(p9, SVEMemOperand(x0, 2, SVE_MUL_VL));
5843 __ Str(p10, SVEMemOperand(x0, -3, SVE_MUL_VL));
5844 __ Str(p11, SVEMemOperand(x0, 255, SVE_MUL_VL));
5845
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005846 // Cases that fall back on `CalculateSVEAddress`.
Jacob Bramley199339d2019-08-05 18:49:13 +01005847 __ Str(p12, SVEMemOperand(x0, 6 * pl));
5848 __ Str(p13, SVEMemOperand(x0, -7 * pl));
5849 __ Str(p14, SVEMemOperand(x0, 314, SVE_MUL_VL));
5850 __ Str(p15, SVEMemOperand(x0, -314, SVE_MUL_VL));
5851
5852 // Corresponding loads.
5853 __ Ldr(p0, SVEMemOperand(x0));
5854 __ Ldr(p1, SVEMemOperand(x0, 2, SVE_MUL_VL));
5855 __ Ldr(p2, SVEMemOperand(x0, -3, SVE_MUL_VL));
5856 __ Ldr(p3, SVEMemOperand(x0, 255, SVE_MUL_VL));
5857
5858 __ Ldr(p4, SVEMemOperand(x0, 6 * pl));
5859 __ Ldr(p5, SVEMemOperand(x0, -7 * pl));
5860 __ Ldr(p6, SVEMemOperand(x0, 314, SVE_MUL_VL));
5861 __ Ldr(p7, SVEMemOperand(x0, -314, SVE_MUL_VL));
5862
5863 END();
5864
5865 if (CAN_RUN()) {
5866 RUN();
5867
5868 uint8_t* expected = new uint8_t[data_size];
5869 memset(expected, 0, data_size);
5870 uint8_t* middle = &expected[data_size / 2];
5871
5872 for (int i = 0; i < pl; i++) {
5873 int bit_index = (i % sizeof(pattern[0])) * kBitsPerByte;
5874 size_t index = i / sizeof(pattern[0]);
5875 VIXL_ASSERT(index < ArrayLength(pattern));
5876 uint64_t byte = (pattern[index] >> bit_index) & 0xff;
5877 // Each byte of `pattern` can be multiplied by 15 without carry.
5878 VIXL_ASSERT((byte * 15) <= 0xff);
5879
5880 middle[i] = byte * 8; // p8
5881 middle[(2 * pl) + i] = byte * 9; // p9
5882 middle[(-3 * pl) + i] = byte * 10; // p10
5883 middle[(255 * pl) + i] = byte * 11; // p11
5884 middle[(6 * pl) + i] = byte * 12; // p12
5885 middle[(-7 * pl) + i] = byte * 13; // p13
5886 middle[(314 * pl) + i] = byte * 14; // p14
5887 middle[(-314 * pl) + i] = byte * 15; // p15
5888 }
5889
Jacob Bramley33c99f92019-10-08 15:24:12 +01005890 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramley199339d2019-08-05 18:49:13 +01005891
5892 ASSERT_EQUAL_SVE(p0, p8);
5893 ASSERT_EQUAL_SVE(p1, p9);
5894 ASSERT_EQUAL_SVE(p2, p10);
5895 ASSERT_EQUAL_SVE(p3, p11);
5896 ASSERT_EQUAL_SVE(p4, p12);
5897 ASSERT_EQUAL_SVE(p5, p13);
5898 ASSERT_EQUAL_SVE(p6, p14);
5899 ASSERT_EQUAL_SVE(p7, p15);
5900
5901 delete[] expected;
5902 }
5903 delete[] data;
5904}
5905
Jacob Bramleye668b202019-08-14 17:57:34 +01005906template <typename T>
5907static void MemoryWrite(uint8_t* base, int64_t offset, int64_t index, T data) {
5908 memcpy(base + offset + (index * sizeof(data)), &data, sizeof(data));
5909}
5910
5911TEST_SVE(sve_ld1_st1_contiguous) {
5912 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
5913 START();
5914
5915 int vl = config->sve_vl_in_bytes();
5916
5917 // The immediate can address [-8, 7] times the VL, so allocate enough space to
5918 // exceed that in both directions.
5919 int data_size = vl * 128;
5920
5921 uint8_t* data = new uint8_t[data_size];
5922 memset(data, 0, data_size);
5923
5924 // Set the base half-way through the buffer so we can use negative indeces.
5925 __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
5926
Jacob Bramleye668b202019-08-14 17:57:34 +01005927 // Encodable scalar-plus-immediate cases.
5928 __ Index(z1.VnB(), 1, -3);
5929 __ Ptrue(p1.VnB());
5930 __ St1b(z1.VnB(), p1, SVEMemOperand(x0));
5931
5932 __ Index(z2.VnH(), -2, 5);
5933 __ Ptrue(p2.VnH(), SVE_MUL3);
5934 __ St1b(z2.VnH(), p2, SVEMemOperand(x0, 7, SVE_MUL_VL));
5935
5936 __ Index(z3.VnS(), 3, -7);
5937 __ Ptrue(p3.VnS(), SVE_POW2);
5938 __ St1h(z3.VnS(), p3, SVEMemOperand(x0, -8, SVE_MUL_VL));
5939
5940 // Encodable scalar-plus-scalar cases.
5941 __ Index(z4.VnD(), -4, 11);
5942 __ Ptrue(p4.VnD(), SVE_VL3);
5943 __ Addvl(x1, x0, 8); // Try not to overlap with VL-dependent cases.
5944 __ Mov(x2, 17);
5945 __ St1b(z4.VnD(), p4, SVEMemOperand(x1, x2));
5946
5947 __ Index(z5.VnD(), 6, -2);
5948 __ Ptrue(p5.VnD(), SVE_VL16);
TatWai Chong6205eb42019-09-24 10:07:20 +01005949 __ Addvl(x3, x0, 10); // Try not to overlap with VL-dependent cases.
5950 __ Mov(x4, 6);
5951 __ St1d(z5.VnD(), p5, SVEMemOperand(x3, x4, LSL, 3));
Jacob Bramleye668b202019-08-14 17:57:34 +01005952
Jacob Bramley6ebbba62019-10-09 15:02:10 +01005953 // Unencodable cases fall back on `CalculateSVEAddress`.
Jacob Bramleye668b202019-08-14 17:57:34 +01005954 __ Index(z6.VnS(), -7, 3);
5955 // Setting SVE_ALL on B lanes checks that the Simulator ignores irrelevant
5956 // predicate bits when handling larger lanes.
5957 __ Ptrue(p6.VnB(), SVE_ALL);
5958 __ St1w(z6.VnS(), p6, SVEMemOperand(x0, 42, SVE_MUL_VL));
5959
TatWai Chong6205eb42019-09-24 10:07:20 +01005960 __ Index(z7.VnD(), 32, -11);
5961 __ Ptrue(p7.VnD(), SVE_MUL4);
5962 __ St1w(z7.VnD(), p7, SVEMemOperand(x0, 22, SVE_MUL_VL));
Jacob Bramleye668b202019-08-14 17:57:34 +01005963
TatWai Chong6205eb42019-09-24 10:07:20 +01005964 // Corresponding loads.
5965 __ Ld1b(z8.VnB(), p1.Zeroing(), SVEMemOperand(x0));
5966 __ Ld1b(z9.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
5967 __ Ld1h(z10.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
5968 __ Ld1b(z11.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
5969 __ Ld1d(z12.VnD(), p5.Zeroing(), SVEMemOperand(x3, x4, LSL, 3));
5970 __ Ld1w(z13.VnS(), p6.Zeroing(), SVEMemOperand(x0, 42, SVE_MUL_VL));
5971
5972 __ Ld1sb(z14.VnH(), p2.Zeroing(), SVEMemOperand(x0, 7, SVE_MUL_VL));
5973 __ Ld1sh(z15.VnS(), p3.Zeroing(), SVEMemOperand(x0, -8, SVE_MUL_VL));
5974 __ Ld1sb(z16.VnD(), p4.Zeroing(), SVEMemOperand(x1, x2));
5975 __ Ld1sw(z17.VnD(), p7.Zeroing(), SVEMemOperand(x0, 22, SVE_MUL_VL));
5976
5977 // We can test ld1 by comparing the value loaded with the value stored. In
5978 // most cases, there are two complications:
5979 // - Loads have zeroing predication, so we have to clear the inactive
5980 // elements on our reference.
5981 // - We have to replicate any sign- or zero-extension.
5982
5983 // Ld1b(z8.VnB(), ...)
5984 __ Dup(z18.VnB(), 0);
5985 __ Mov(z18.VnB(), p1.Merging(), z1.VnB());
5986
5987 // Ld1b(z9.VnH(), ...)
5988 __ Dup(z19.VnH(), 0);
5989 __ Uxtb(z19.VnH(), p2.Merging(), z2.VnH());
5990
5991 // Ld1h(z10.VnS(), ...)
5992 __ Dup(z20.VnS(), 0);
5993 __ Uxth(z20.VnS(), p3.Merging(), z3.VnS());
5994
5995 // Ld1b(z11.VnD(), ...)
5996 __ Dup(z21.VnD(), 0);
5997 __ Uxtb(z21.VnD(), p4.Merging(), z4.VnD());
5998
5999 // Ld1d(z12.VnD(), ...)
6000 __ Dup(z22.VnD(), 0);
6001 __ Mov(z22.VnD(), p5.Merging(), z5.VnD());
6002
6003 // Ld1w(z13.VnS(), ...)
6004 __ Dup(z23.VnS(), 0);
6005 __ Mov(z23.VnS(), p6.Merging(), z6.VnS());
6006
6007 // Ld1sb(z14.VnH(), ...)
6008 __ Dup(z24.VnH(), 0);
6009 __ Sxtb(z24.VnH(), p2.Merging(), z2.VnH());
6010
6011 // Ld1sh(z15.VnS(), ...)
6012 __ Dup(z25.VnS(), 0);
6013 __ Sxth(z25.VnS(), p3.Merging(), z3.VnS());
6014
6015 // Ld1sb(z16.VnD(), ...)
6016 __ Dup(z26.VnD(), 0);
6017 __ Sxtb(z26.VnD(), p4.Merging(), z4.VnD());
6018
6019 // Ld1sw(z17.VnD(), ...)
6020 __ Dup(z27.VnD(), 0);
6021 __ Sxtw(z27.VnD(), p7.Merging(), z7.VnD());
Jacob Bramleye668b202019-08-14 17:57:34 +01006022
6023 END();
6024
6025 if (CAN_RUN()) {
6026 RUN();
6027
6028 uint8_t* expected = new uint8_t[data_size];
6029 memset(expected, 0, data_size);
6030 uint8_t* middle = &expected[data_size / 2];
6031
6032 int vl_b = vl / kBRegSizeInBytes;
6033 int vl_h = vl / kHRegSizeInBytes;
6034 int vl_s = vl / kSRegSizeInBytes;
6035 int vl_d = vl / kDRegSizeInBytes;
6036
6037 // Encodable cases.
6038
6039 // st1b { z1.b }, SVE_ALL
6040 for (int i = 0; i < vl_b; i++) {
6041 MemoryWrite(middle, 0, i, static_cast<uint8_t>(1 - (3 * i)));
6042 }
6043
6044 // st1b { z2.h }, SVE_MUL3
6045 int vl_h_mul3 = vl_h - (vl_h % 3);
6046 for (int i = 0; i < vl_h_mul3; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006047 int64_t offset = 7 * static_cast<int>(vl / (kHRegSize / kBRegSize));
6048 MemoryWrite(middle, offset, i, static_cast<uint8_t>(-2 + (5 * i)));
Jacob Bramleye668b202019-08-14 17:57:34 +01006049 }
6050
6051 // st1h { z3.s }, SVE_POW2
6052 int vl_s_pow2 = 1 << HighestSetBitPosition(vl_s);
6053 for (int i = 0; i < vl_s_pow2; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006054 int64_t offset = -8 * static_cast<int>(vl / (kSRegSize / kHRegSize));
6055 MemoryWrite(middle, offset, i, static_cast<uint16_t>(3 - (7 * i)));
Jacob Bramleye668b202019-08-14 17:57:34 +01006056 }
6057
6058 // st1b { z4.d }, SVE_VL3
6059 if (vl_d >= 3) {
6060 for (int i = 0; i < 3; i++) {
6061 MemoryWrite(middle,
6062 (8 * vl) + 17,
6063 i,
6064 static_cast<uint8_t>(-4 + (11 * i)));
6065 }
6066 }
6067
6068 // st1d { z5.d }, SVE_VL16
6069 if (vl_d >= 16) {
6070 for (int i = 0; i < 16; i++) {
6071 MemoryWrite(middle,
6072 (10 * vl) + (6 * kDRegSizeInBytes),
6073 i,
6074 static_cast<uint64_t>(6 - (2 * i)));
6075 }
6076 }
6077
6078 // Unencodable cases.
6079
6080 // st1w { z6.s }, SVE_ALL
6081 for (int i = 0; i < vl_s; i++) {
6082 MemoryWrite(middle, 42 * vl, i, static_cast<uint32_t>(-7 + (3 * i)));
6083 }
6084
TatWai Chong6205eb42019-09-24 10:07:20 +01006085 // st1w { z7.d }, SVE_MUL4
6086 int vl_d_mul4 = vl_d - (vl_d % 4);
6087 for (int i = 0; i < vl_d_mul4; i++) {
Jacob Bramley6ebbba62019-10-09 15:02:10 +01006088 int64_t offset = 22 * static_cast<int>(vl / (kDRegSize / kWRegSize));
6089 MemoryWrite(middle, offset, i, static_cast<uint32_t>(32 + (-11 * i)));
TatWai Chong6205eb42019-09-24 10:07:20 +01006090 }
6091
Jacob Bramley33c99f92019-10-08 15:24:12 +01006092 ASSERT_EQUAL_MEMORY(expected, data, data_size, middle - expected);
Jacob Bramleye668b202019-08-14 17:57:34 +01006093
TatWai Chong6205eb42019-09-24 10:07:20 +01006094 // Check that we loaded back the expected values.
6095
6096 ASSERT_EQUAL_SVE(z18, z8);
6097 ASSERT_EQUAL_SVE(z19, z9);
6098 ASSERT_EQUAL_SVE(z20, z10);
6099 ASSERT_EQUAL_SVE(z21, z11);
6100 ASSERT_EQUAL_SVE(z22, z12);
6101 ASSERT_EQUAL_SVE(z23, z13);
6102 ASSERT_EQUAL_SVE(z24, z14);
6103 ASSERT_EQUAL_SVE(z25, z15);
6104 ASSERT_EQUAL_SVE(z26, z16);
6105 ASSERT_EQUAL_SVE(z27, z17);
6106
Jacob Bramleye668b202019-08-14 17:57:34 +01006107 delete[] expected;
6108 }
6109 delete[] data;
6110}
6111
TatWai Chong6995bfd2019-09-26 10:48:05 +01006112typedef void (MacroAssembler::*IntWideImmFn)(const ZRegister& zd,
6113 const ZRegister& zn,
6114 const IntegerOperand imm);
6115
6116template <typename F, typename Td, typename Tn>
6117static void IntWideImmHelper(Test* config,
6118 F macro,
6119 unsigned lane_size_in_bits,
6120 const Tn& zn_inputs,
6121 IntegerOperand imm,
6122 const Td& zd_expected) {
6123 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6124 START();
6125
6126 ZRegister zd1 = z0.WithLaneSize(lane_size_in_bits);
6127 InsrHelper(&masm, zd1, zn_inputs);
6128
6129 // Also test with a different zn, to test the movprfx case.
6130 ZRegister zn = z1.WithLaneSize(lane_size_in_bits);
6131 InsrHelper(&masm, zn, zn_inputs);
6132 ZRegister zd2 = z2.WithLaneSize(lane_size_in_bits);
6133 ZRegister zn_copy = z3.WithSameLaneSizeAs(zn);
6134
6135 // Make a copy so we can check that constructive operations preserve zn.
6136 __ Mov(zn_copy, zn);
6137
6138 {
6139 UseScratchRegisterScope temps(&masm);
6140 // The MacroAssembler needs a P scratch register for some of these macros,
6141 // and it doesn't have one by default.
6142 temps.Include(p3);
6143
6144 (masm.*macro)(zd1, zd1, imm);
6145 (masm.*macro)(zd2, zn, imm);
6146 }
6147
6148 END();
6149
6150 if (CAN_RUN()) {
6151 RUN();
6152
6153 ASSERT_EQUAL_SVE(zd_expected, zd1);
6154
6155 // Check the result from `instr` with movprfx is the same as
6156 // the immediate version.
6157 ASSERT_EQUAL_SVE(zd_expected, zd2);
6158
6159 ASSERT_EQUAL_SVE(zn_copy, zn);
6160 }
6161}
6162
6163TEST_SVE(sve_int_wide_imm_unpredicated_smax) {
6164 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
6165 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
6166 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
6167 int64_t in_d[] = {1, 10, 10000, 1000000};
6168
6169 IntWideImmFn fn = &MacroAssembler::Smax;
6170
6171 int exp_b_1[] = {0, -1, 127, -1, 126, 1, -1, 55};
6172 int exp_h_1[] = {127, 127, 127, 127, INT16_MAX, 127, 127, 5555};
6173 int exp_s_1[] = {0, -128, 127, -128, INT32_MAX, 1, -1, 555555};
6174 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
6175
6176 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
6177 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
6178 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
6179 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
6180
6181 int exp_h_2[] = {0, -128, 127, -255, INT16_MAX, 1, -1, 5555};
6182 int exp_s_2[] = {2048, 2048, 2048, 2048, INT32_MAX, 2048, 2048, 555555};
6183 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
6184
6185 // The immediate is in the range [-128, 127], but the macro is able to
6186 // synthesise unencodable immediates.
6187 // B-sized lanes cannot take an immediate out of the range [-128, 127].
6188 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
6189 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
6190 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
6191}
6192
6193TEST_SVE(sve_int_wide_imm_unpredicated_smin) {
6194 int in_b[] = {0, -128, 127, -127, 126, 1, -1, 55};
6195 int in_h[] = {0, -128, 127, INT16_MIN, INT16_MAX, 1, -1, 5555};
6196 int in_s[] = {0, -128, 127, INT32_MIN, INT32_MAX, 1, -1, 555555};
6197 int64_t in_d[] = {1, 10, 10000, 1000000};
6198
6199 IntWideImmFn fn = &MacroAssembler::Smin;
6200
6201 int exp_b_1[] = {-1, -128, -1, -127, -1, -1, -1, -1};
6202 int exp_h_1[] = {0, -128, 127, INT16_MIN, 127, 1, -1, 127};
6203 int exp_s_1[] = {-128, -128, -128, INT32_MIN, -128, -128, -128, -128};
6204 int64_t exp_d_1[] = {1, 10, 99, 99};
6205
6206 IntWideImmHelper(config, fn, kBRegSize, in_b, -1, exp_b_1);
6207 IntWideImmHelper(config, fn, kHRegSize, in_h, 127, exp_h_1);
6208 IntWideImmHelper(config, fn, kSRegSize, in_s, -128, exp_s_1);
6209 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
6210
6211 int exp_h_2[] = {-255, -255, -255, INT16_MIN, -255, -255, -255, -255};
6212 int exp_s_2[] = {0, -128, 127, INT32_MIN, 2048, 1, -1, 2048};
6213 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
6214
6215 // The immediate is in the range [-128, 127], but the macro is able to
6216 // synthesise unencodable immediates.
6217 // B-sized lanes cannot take an immediate out of the range [-128, 127].
6218 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
6219 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
6220 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
6221}
6222
6223TEST_SVE(sve_int_wide_imm_unpredicated_umax) {
6224 int in_b[] = {0, 255, 127, 0x80, 1, 55};
6225 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
6226 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
6227 int64_t in_d[] = {1, 10, 10000, 1000000};
6228
6229 IntWideImmFn fn = &MacroAssembler::Umax;
6230
6231 int exp_b_1[] = {17, 255, 127, 0x80, 17, 55};
6232 int exp_h_1[] = {127, 255, 127, INT16_MAX, 127, 5555};
6233 int exp_s_1[] = {255, 255, 255, INT32_MAX, 255, 555555};
6234 int64_t exp_d_1[] = {99, 99, 10000, 1000000};
6235
6236 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
6237 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
6238 IntWideImmHelper(config, fn, kSRegSize, in_s, 0xff, exp_s_1);
6239 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
6240
6241 int exp_h_2[] = {511, 511, 511, INT16_MAX, 511, 5555};
6242 int exp_s_2[] = {2048, 2048, 2048, INT32_MAX, 2048, 555555};
6243 int64_t exp_d_2[] = {INT16_MAX, INT16_MAX, INT16_MAX, 1000000};
6244
6245 // The immediate is in the range [0, 255], but the macro is able to
6246 // synthesise unencodable immediates.
6247 // B-sized lanes cannot take an immediate out of the range [0, 255].
6248 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
6249 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
6250 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
6251}
6252
6253TEST_SVE(sve_int_wide_imm_unpredicated_umin) {
6254 int in_b[] = {0, 255, 127, 0x80, 1, 55};
6255 int in_h[] = {0, 255, 127, INT16_MAX, 1, 5555};
6256 int in_s[] = {0, 0xff, 0x7f, INT32_MAX, 1, 555555};
6257 int64_t in_d[] = {1, 10, 10000, 1000000};
6258
6259 IntWideImmFn fn = &MacroAssembler::Umin;
6260
6261 int exp_b_1[] = {0, 17, 17, 17, 1, 17};
6262 int exp_h_1[] = {0, 127, 127, 127, 1, 127};
6263 int exp_s_1[] = {0, 255, 127, 255, 1, 255};
6264 int64_t exp_d_1[] = {1, 10, 99, 99};
6265
6266 IntWideImmHelper(config, fn, kBRegSize, in_b, 17, exp_b_1);
6267 IntWideImmHelper(config, fn, kHRegSize, in_h, 0x7f, exp_h_1);
6268 IntWideImmHelper(config, fn, kSRegSize, in_s, 255, exp_s_1);
6269 IntWideImmHelper(config, fn, kDRegSize, in_d, 99, exp_d_1);
6270
6271 int exp_h_2[] = {0, 255, 127, 511, 1, 511};
6272 int exp_s_2[] = {0, 255, 127, 2048, 1, 2048};
6273 int64_t exp_d_2[] = {1, 10, 10000, INT16_MAX};
6274
6275 // The immediate is in the range [0, 255], but the macro is able to
6276 // synthesise unencodable immediates.
6277 // B-sized lanes cannot take an immediate out of the range [0, 255].
6278 IntWideImmHelper(config, fn, kHRegSize, in_h, 511, exp_h_2);
6279 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
6280 IntWideImmHelper(config, fn, kDRegSize, in_d, INT16_MAX, exp_d_2);
6281}
6282
6283TEST_SVE(sve_int_wide_imm_unpredicated_mul) {
6284 int in_b[] = {11, -1, 7, -3};
6285 int in_h[] = {111, -1, 17, -123};
6286 int in_s[] = {11111, -1, 117, -12345};
6287 int64_t in_d[] = {0x7fffffff, 0x80000000};
6288
6289 IntWideImmFn fn = &MacroAssembler::Mul;
6290
6291 int exp_b_1[] = {66, -6, 42, -18};
6292 int exp_h_1[] = {-14208, 128, -2176, 15744};
6293 int exp_s_1[] = {11111 * 127, -127, 117 * 127, -12345 * 127};
6294 int64_t exp_d_1[] = {0xfffffffe, 0x100000000};
6295
6296 IntWideImmHelper(config, fn, kBRegSize, in_b, 6, exp_b_1);
6297 IntWideImmHelper(config, fn, kHRegSize, in_h, -128, exp_h_1);
6298 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
6299 IntWideImmHelper(config, fn, kDRegSize, in_d, 2, exp_d_1);
6300
6301 int exp_h_2[] = {-28305, 255, -4335, 31365};
6302 int exp_s_2[] = {22755328, -2048, 239616, -25282560};
6303 int64_t exp_d_2[] = {0x00000063ffffff38, 0x0000006400000000};
6304
6305 // The immediate is in the range [-128, 127], but the macro is able to
6306 // synthesise unencodable immediates.
6307 // B-sized lanes cannot take an immediate out of the range [0, 255].
6308 IntWideImmHelper(config, fn, kHRegSize, in_h, -255, exp_h_2);
6309 IntWideImmHelper(config, fn, kSRegSize, in_s, 2048, exp_s_2);
6310 IntWideImmHelper(config, fn, kDRegSize, in_d, 200, exp_d_2);
6311
6312 // Integer overflow on multiplication.
6313 unsigned exp_b_3[] = {0x75, 0x81, 0x79, 0x83};
6314
6315 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x7f, exp_b_3);
6316}
6317
6318TEST_SVE(sve_int_wide_imm_unpredicated_add) {
6319 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
6320 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
6321 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
6322 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
6323
6324 IntWideImmFn fn = &MacroAssembler::Add;
6325
6326 unsigned exp_b_1[] = {0x02, 0x00, 0x91, 0x80};
6327 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
6328 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
6329 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
6330
6331 // Encodable with `add` (shift 0).
6332 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
6333 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
6334 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
6335 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
6336
6337 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
6338 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
6339 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
6340
6341 // Encodable with `add` (shift 8).
6342 // B-sized lanes cannot take a shift of 8.
6343 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
6344 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
6345 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
6346
6347 unsigned exp_s_3[] = {0x80808181, 0x807e7f7f, 0xab29aaaa, 0xf07ff0f0};
6348
6349 // The macro is able to synthesise unencodable immediates.
6350 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramleyd9f929c2019-10-02 11:42:56 +01006351
6352 unsigned exp_b_4[] = {0x61, 0x5f, 0xf0, 0xdf};
6353 unsigned exp_h_4[] = {0x6181, 0x5f7f, 0xf010, 0x8aaa};
6354 unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
6355 uint64_t exp_d_4[] = {0x8000000180018180, 0x7fffffff7fff7f7e};
6356
6357 // Negative immediates use `sub`.
6358 IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
6359 IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
6360 IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
6361 IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006362}
6363
6364TEST_SVE(sve_int_wide_imm_unpredicated_sqadd) {
6365 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
6366 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
6367 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
6368 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
6369
6370 IntWideImmFn fn = &MacroAssembler::Sqadd;
6371
Jacob Bramleyb28f6172019-10-02 12:12:35 +01006372 unsigned exp_b_1[] = {0x02, 0x7f, 0x7f, 0x7f};
TatWai Chong6995bfd2019-09-26 10:48:05 +01006373 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
6374 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
6375 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
6376
6377 // Encodable with `sqadd` (shift 0).
Jacob Bramleyb28f6172019-10-02 12:12:35 +01006378 // Note that encodable immediates are unsigned, even for signed saturation.
6379 IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006380 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
6381 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramleyb28f6172019-10-02 12:12:35 +01006382 IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006383
6384 unsigned exp_h_2[] = {0x9181, 0x7fff, 0x2010, 0xbaaa};
6385 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
6386 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
6387
6388 // Encodable with `sqadd` (shift 8).
6389 // B-sized lanes cannot take a shift of 8.
6390 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
6391 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
6392 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006393}
6394
6395TEST_SVE(sve_int_wide_imm_unpredicated_uqadd) {
6396 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
6397 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
6398 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
6399 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
6400
6401 IntWideImmFn fn = &MacroAssembler::Uqadd;
6402
6403 unsigned exp_b_1[] = {0xff, 0xff, 0x91, 0xff};
6404 unsigned exp_h_1[] = {0x8191, 0x7f8f, 0x1020, 0xaaba};
6405 unsigned exp_s_1[] = {0x80018200, 0x7fff7ffe, 0xaaaaab29, 0xf000f16f};
6406 uint64_t exp_d_1[] = {0x8000000180018280, 0x7fffffff7fff807e};
6407
6408 // Encodable with `uqadd` (shift 0).
6409 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
6410 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
6411 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
6412 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
6413
6414 unsigned exp_h_2[] = {0x9181, 0x8f7f, 0x2010, 0xbaaa};
6415 unsigned exp_s_2[] = {0x80020081, 0x7ffffe7f, 0xaaab29aa, 0xf0016ff0};
6416 uint64_t exp_d_2[] = {0x8000000180028081, 0x7fffffff80007e7f};
6417
6418 // Encodable with `uqadd` (shift 8).
6419 // B-sized lanes cannot take a shift of 8.
6420 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
6421 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
6422 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006423}
6424
6425TEST_SVE(sve_int_wide_imm_unpredicated_sub) {
6426 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
6427 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
6428 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
6429 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
6430
6431 IntWideImmFn fn = &MacroAssembler::Sub;
6432
6433 unsigned exp_b_1[] = {0x00, 0xfe, 0x8f, 0x7e};
6434 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
6435 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
6436 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
6437
6438 // Encodable with `sub` (shift 0).
6439 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
6440 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
6441 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
6442 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
6443
6444 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
6445 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
6446 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
6447
6448 // Encodable with `sub` (shift 8).
6449 // B-sized lanes cannot take a shift of 8.
6450 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
6451 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
6452 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
6453
6454 unsigned exp_s_3[] = {0x7f828181, 0x7f807f7f, 0xaa2baaaa, 0xef81f0f0};
6455
6456 // The macro is able to synthesise unencodable immediates.
6457 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 16, exp_s_3);
Jacob Bramleyd9f929c2019-10-02 11:42:56 +01006458
6459 unsigned exp_b_4[] = {0xa1, 0x9f, 0x30, 0x1f};
6460 unsigned exp_h_4[] = {0xa181, 0x9f7f, 0x3010, 0xcaaa};
6461 unsigned exp_s_4[] = {0x00018181, 0xffff7f7f, 0x2aaaaaaa, 0x7000f0f0};
6462 uint64_t exp_d_4[] = {0x8000000180018182, 0x7fffffff7fff7f80};
6463
6464 // Negative immediates use `add`.
6465 IntWideImmHelper(config, fn, kBRegSize, in_b, -0x20, exp_b_4);
6466 IntWideImmHelper(config, fn, kHRegSize, in_h, -0x2000, exp_h_4);
6467 IntWideImmHelper(config, fn, kSRegSize, in_s, INT32_MIN, exp_s_4);
6468 IntWideImmHelper(config, fn, kDRegSize, in_d, -1, exp_d_4);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006469}
6470
6471TEST_SVE(sve_int_wide_imm_unpredicated_sqsub) {
6472 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
6473 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
6474 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
6475 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
6476
6477 IntWideImmFn fn = &MacroAssembler::Sqsub;
6478
Jacob Bramleyb28f6172019-10-02 12:12:35 +01006479 unsigned exp_b_1[] = {0x80, 0xfe, 0x8f, 0x80};
TatWai Chong6995bfd2019-09-26 10:48:05 +01006480 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
6481 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
6482 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
6483
6484 // Encodable with `sqsub` (shift 0).
Jacob Bramleyb28f6172019-10-02 12:12:35 +01006485 // Note that encodable immediates are unsigned, even for signed saturation.
6486 IntWideImmHelper(config, fn, kBRegSize, in_b, 129, exp_b_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006487 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
6488 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
Jacob Bramleyb28f6172019-10-02 12:12:35 +01006489 IntWideImmHelper(config, fn, kDRegSize, in_d, 255, exp_d_1);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006490
6491 unsigned exp_h_2[] = {0x8000, 0x6f7f, 0x0010, 0x9aaa};
6492 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
6493 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
6494
6495 // Encodable with `sqsub` (shift 8).
6496 // B-sized lanes cannot take a shift of 8.
6497 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
6498 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
6499 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006500}
6501
6502TEST_SVE(sve_int_wide_imm_unpredicated_uqsub) {
6503 unsigned in_b[] = {0x81, 0x7f, 0x10, 0xff};
6504 unsigned in_h[] = {0x8181, 0x7f7f, 0x1010, 0xaaaa};
6505 unsigned in_s[] = {0x80018181, 0x7fff7f7f, 0xaaaaaaaa, 0xf000f0f0};
6506 uint64_t in_d[] = {0x8000000180018181, 0x7fffffff7fff7f7f};
6507
6508 IntWideImmFn fn = &MacroAssembler::Uqsub;
6509
6510 unsigned exp_b_1[] = {0x00, 0x00, 0x00, 0x7e};
6511 unsigned exp_h_1[] = {0x8171, 0x7f6f, 0x1000, 0xaa9a};
6512 unsigned exp_s_1[] = {0x80018102, 0x7fff7f00, 0xaaaaaa2b, 0xf000f071};
6513 uint64_t exp_d_1[] = {0x8000000180018082, 0x7fffffff7fff7e80};
6514
6515 // Encodable with `uqsub` (shift 0).
6516 IntWideImmHelper(config, fn, kBRegSize, in_b, 0x81, exp_b_1);
6517 IntWideImmHelper(config, fn, kHRegSize, in_h, 16, exp_h_1);
6518 IntWideImmHelper(config, fn, kSRegSize, in_s, 127, exp_s_1);
6519 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff, exp_d_1);
6520
6521 unsigned exp_h_2[] = {0x7181, 0x6f7f, 0x0010, 0x9aaa};
6522 unsigned exp_s_2[] = {0x80010281, 0x7fff007f, 0xaaaa2baa, 0xf00071f0};
6523 uint64_t exp_d_2[] = {0x8000000180008281, 0x7fffffff7ffe807f};
6524
6525 // Encodable with `uqsub` (shift 8).
6526 // B-sized lanes cannot take a shift of 8.
6527 IntWideImmHelper(config, fn, kHRegSize, in_h, 16 << 8, exp_h_2);
6528 IntWideImmHelper(config, fn, kSRegSize, in_s, 127 << 8, exp_s_2);
6529 IntWideImmHelper(config, fn, kDRegSize, in_d, 0xff << 8, exp_d_2);
TatWai Chong6995bfd2019-09-26 10:48:05 +01006530}
6531
6532TEST_SVE(sve_int_wide_imm_unpredicated_subr) {
6533 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6534 START();
6535
6536 // Encodable with `subr` (shift 0).
6537 __ Index(z0.VnD(), 1, 1);
6538 __ Sub(z0.VnD(), 100, z0.VnD());
6539 __ Index(z1.VnS(), 0x7f, 1);
6540 __ Sub(z1.VnS(), 0xf7, z1.VnS());
6541 __ Index(z2.VnH(), 0xaaaa, 0x2222);
6542 __ Sub(z2.VnH(), 0x80, z2.VnH());
6543 __ Index(z3.VnB(), 133, 1);
6544 __ Sub(z3.VnB(), 255, z3.VnB());
6545
6546 // Encodable with `subr` (shift 8).
6547 __ Index(z4.VnD(), 256, -1);
6548 __ Sub(z4.VnD(), 42 * 256, z4.VnD());
6549 __ Index(z5.VnS(), 0x7878, 1);
6550 __ Sub(z5.VnS(), 0x8000, z5.VnS());
6551 __ Index(z6.VnH(), 0x30f0, -1);
6552 __ Sub(z6.VnH(), 0x7f00, z6.VnH());
6553 // B-sized lanes cannot take a shift of 8.
6554
6555 // Select with movprfx.
6556 __ Index(z31.VnD(), 256, 4001);
6557 __ Sub(z7.VnD(), 42 * 256, z31.VnD());
6558
6559 // Out of immediate encodable range of `sub`.
6560 __ Index(z30.VnS(), 0x11223344, 1);
6561 __ Sub(z8.VnS(), 0x88776655, z30.VnS());
6562
6563 END();
6564
6565 if (CAN_RUN()) {
6566 RUN();
6567
6568 int expected_z0[] = {87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99};
6569 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
6570
6571 int expected_z1[] = {0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78};
6572 ASSERT_EQUAL_SVE(expected_z1, z1.VnS());
6573
6574 int expected_z2[] = {0xab2c, 0xcd4e, 0xef70, 0x1192, 0x33b4, 0x55d6};
6575 ASSERT_EQUAL_SVE(expected_z2, z2.VnH());
6576
6577 int expected_z3[] = {0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a};
6578 ASSERT_EQUAL_SVE(expected_z3, z3.VnB());
6579
6580 int expected_z4[] = {10502, 10501, 10500, 10499, 10498, 10497, 10496};
6581 ASSERT_EQUAL_SVE(expected_z4, z4.VnD());
6582
6583 int expected_z5[] = {0x0783, 0x0784, 0x0785, 0x0786, 0x0787, 0x0788};
6584 ASSERT_EQUAL_SVE(expected_z5, z5.VnS());
6585
6586 int expected_z6[] = {0x4e15, 0x4e14, 0x4e13, 0x4e12, 0x4e11, 0x4e10};
6587 ASSERT_EQUAL_SVE(expected_z6, z6.VnH());
6588
6589 int expected_z7[] = {-13510, -9509, -5508, -1507, 2494, 6495, 10496};
6590 ASSERT_EQUAL_SVE(expected_z7, z7.VnD());
6591
6592 int expected_z8[] = {0x7755330e, 0x7755330f, 0x77553310, 0x77553311};
6593 ASSERT_EQUAL_SVE(expected_z8, z8.VnS());
6594 }
6595}
6596
6597TEST_SVE(sve_int_wide_imm_unpredicated_fdup) {
6598 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6599 START();
6600
6601 // Immediates which can be encoded in the instructions.
6602 __ Fdup(z0.VnH(), RawbitsToFloat16(0xc500));
6603 __ Fdup(z1.VnS(), Float16(2.0));
6604 __ Fdup(z2.VnD(), Float16(3.875));
6605 __ Fdup(z3.VnH(), 8.0f);
6606 __ Fdup(z4.VnS(), -4.75f);
6607 __ Fdup(z5.VnD(), 0.5f);
6608 __ Fdup(z6.VnH(), 1.0);
6609 __ Fdup(z7.VnS(), 2.125);
6610 __ Fdup(z8.VnD(), -13.0);
6611
6612 // Immediates which cannot be encoded in the instructions.
6613 __ Fdup(z10.VnH(), Float16(0.0));
6614 __ Fdup(z11.VnH(), kFP16PositiveInfinity);
6615 __ Fdup(z12.VnS(), 255.0f);
6616 __ Fdup(z13.VnS(), kFP32NegativeInfinity);
6617 __ Fdup(z14.VnD(), 12.3456);
6618 __ Fdup(z15.VnD(), kFP64PositiveInfinity);
6619
6620 END();
6621
6622 if (CAN_RUN()) {
6623 RUN();
6624
6625 ASSERT_EQUAL_SVE(0xc500, z0.VnH());
6626 ASSERT_EQUAL_SVE(0x40000000, z1.VnS());
6627 ASSERT_EQUAL_SVE(0x400f000000000000, z2.VnD());
6628 ASSERT_EQUAL_SVE(0x4800, z3.VnH());
6629 ASSERT_EQUAL_SVE(FloatToRawbits(-4.75f), z4.VnS());
6630 ASSERT_EQUAL_SVE(DoubleToRawbits(0.5), z5.VnD());
6631 ASSERT_EQUAL_SVE(0x3c00, z6.VnH());
6632 ASSERT_EQUAL_SVE(FloatToRawbits(2.125f), z7.VnS());
6633 ASSERT_EQUAL_SVE(DoubleToRawbits(-13.0), z8.VnD());
6634
6635 ASSERT_EQUAL_SVE(0x0000, z10.VnH());
6636 ASSERT_EQUAL_SVE(Float16ToRawbits(kFP16PositiveInfinity), z11.VnH());
6637 ASSERT_EQUAL_SVE(FloatToRawbits(255.0), z12.VnS());
6638 ASSERT_EQUAL_SVE(FloatToRawbits(kFP32NegativeInfinity), z13.VnS());
6639 ASSERT_EQUAL_SVE(DoubleToRawbits(12.3456), z14.VnD());
6640 ASSERT_EQUAL_SVE(DoubleToRawbits(kFP64PositiveInfinity), z15.VnD());
6641 }
6642}
6643
TatWai Chong6f111bc2019-10-07 09:20:37 +01006644TEST_SVE(sve_andv_eorv_orv) {
6645 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6646 START();
6647
6648 uint64_t in[] = {0x8899aabbccddeeff, 0x7777555533331111, 0x123456789abcdef0};
6649 InsrHelper(&masm, z31.VnD(), in);
6650
6651 // For simplicity, we re-use the same pg for various lane sizes.
6652 // For D lanes: 1, 1, 0
6653 // For S lanes: 1, 1, 1, 0, 0
6654 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6655 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6656 Initialise(&masm, p0.VnB(), pg_in);
6657
6658 // Make a copy so we can check that constructive operations preserve zn.
6659 __ Mov(z0, z31);
6660 __ Andv(b0, p0, z0.VnB()); // destructive
6661 __ Andv(h1, p0, z31.VnH());
6662 __ Mov(z2, z31);
6663 __ Andv(s2, p0, z2.VnS()); // destructive
6664 __ Andv(d3, p0, z31.VnD());
6665
6666 __ Eorv(b4, p0, z31.VnB());
6667 __ Mov(z5, z31);
6668 __ Eorv(h5, p0, z5.VnH()); // destructive
6669 __ Eorv(s6, p0, z31.VnS());
6670 __ Mov(z7, z31);
6671 __ Eorv(d7, p0, z7.VnD()); // destructive
6672
6673 __ Mov(z8, z31);
6674 __ Orv(b8, p0, z8.VnB()); // destructive
6675 __ Orv(h9, p0, z31.VnH());
6676 __ Mov(z10, z31);
6677 __ Orv(s10, p0, z10.VnS()); // destructive
6678 __ Orv(d11, p0, z31.VnD());
6679
6680 END();
6681
6682 if (CAN_RUN()) {
6683 RUN();
6684
6685 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
6686 ASSERT_EQUAL_64(0x10, d0);
6687 ASSERT_EQUAL_64(0x1010, d1);
6688 ASSERT_EQUAL_64(0x33331111, d2);
6689 ASSERT_EQUAL_64(0x7777555533331111, d3);
6690 ASSERT_EQUAL_64(0xbf, d4);
6691 ASSERT_EQUAL_64(0xedcb, d5);
6692 ASSERT_EQUAL_64(0x44444444, d6);
6693 ASSERT_EQUAL_64(0x7777555533331111, d7);
6694 ASSERT_EQUAL_64(0xff, d8);
6695 ASSERT_EQUAL_64(0xffff, d9);
6696 ASSERT_EQUAL_64(0x77775555, d10);
6697 ASSERT_EQUAL_64(0x7777555533331111, d11);
6698 } else {
6699 ASSERT_EQUAL_64(0, d0);
6700 ASSERT_EQUAL_64(0x0010, d1);
6701 ASSERT_EQUAL_64(0x00110011, d2);
6702 ASSERT_EQUAL_64(0x0011001100110011, d3);
6703 ASSERT_EQUAL_64(0x62, d4);
6704 ASSERT_EQUAL_64(0x0334, d5);
6705 ASSERT_EQUAL_64(0x8899aabb, d6);
6706 ASSERT_EQUAL_64(0xffeeffeeffeeffee, d7);
6707 ASSERT_EQUAL_64(0xff, d8);
6708 ASSERT_EQUAL_64(0xffff, d9);
6709 ASSERT_EQUAL_64(0xffffffff, d10);
6710 ASSERT_EQUAL_64(0xffffffffffffffff, d11);
6711 }
6712
6713 // Check the upper lanes above the top of the V register are all clear.
6714 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
6715 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
6716 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
6717 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
6718 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
6719 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
6720 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
6721 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
6722 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
6723 ASSERT_EQUAL_SVE_LANE(0, z8.VnD(), i);
6724 ASSERT_EQUAL_SVE_LANE(0, z9.VnD(), i);
6725 ASSERT_EQUAL_SVE_LANE(0, z10.VnD(), i);
6726 ASSERT_EQUAL_SVE_LANE(0, z11.VnD(), i);
6727 }
6728 }
6729}
6730
TatWai Chongb2d8d1f2019-10-21 15:19:31 -07006731
6732TEST_SVE(sve_saddv_uaddv) {
6733 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6734 START();
6735
6736 uint64_t in[] = {0x8899aabbccddeeff, 0x8182838485868788, 0x0807060504030201};
6737 InsrHelper(&masm, z31.VnD(), in);
6738
6739 // For simplicity, we re-use the same pg for various lane sizes.
6740 // For D lanes: 1, 1, 0
6741 // For S lanes: 1, 1, 1, 0, 0
6742 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
6743 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
6744 Initialise(&masm, p0.VnB(), pg_in);
6745
6746 // Make a copy so we can check that constructive operations preserve zn.
6747 __ Mov(z0, z31);
6748 __ Saddv(b0, p0, z0.VnB()); // destructive
6749 __ Saddv(h1, p0, z31.VnH());
6750 __ Mov(z2, z31);
6751 __ Saddv(s2, p0, z2.VnS()); // destructive
6752
6753 __ Uaddv(b4, p0, z31.VnB());
6754 __ Mov(z5, z31);
6755 __ Uaddv(h5, p0, z5.VnH()); // destructive
6756 __ Uaddv(s6, p0, z31.VnS());
6757 __ Mov(z7, z31);
6758 __ Uaddv(d7, p0, z7.VnD()); // destructive
6759
6760 END();
6761
6762 if (CAN_RUN()) {
6763 RUN();
6764
6765 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
6766 // Saddv
6767 ASSERT_EQUAL_64(0xfffffffffffffda9, d0);
6768 ASSERT_EQUAL_64(0xfffffffffffe9495, d1);
6769 ASSERT_EQUAL_64(0xffffffff07090b0c, d2);
6770 // Uaddv
6771 ASSERT_EQUAL_64(0x00000000000002a9, d4);
6772 ASSERT_EQUAL_64(0x0000000000019495, d5);
6773 ASSERT_EQUAL_64(0x0000000107090b0c, d6);
6774 ASSERT_EQUAL_64(0x8182838485868788, d7);
6775 } else {
6776 // Saddv
6777 ASSERT_EQUAL_64(0xfffffffffffffd62, d0);
6778 ASSERT_EQUAL_64(0xfffffffffffe8394, d1);
6779 ASSERT_EQUAL_64(0xfffffffed3e6fa0b, d2);
6780 // Uaddv
6781 ASSERT_EQUAL_64(0x0000000000000562, d4);
6782 ASSERT_EQUAL_64(0x0000000000028394, d5);
6783 ASSERT_EQUAL_64(0x00000001d3e6fa0b, d6);
6784 ASSERT_EQUAL_64(0x0a1c2e4052647687, d7);
6785 }
6786
6787 // Check the upper lanes above the top of the V register are all clear.
6788 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
6789 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
6790 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
6791 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
6792 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
6793 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
6794 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
6795 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
6796 }
6797 }
6798}
6799
6800
6801TEST_SVE(sve_sminv_uminv) {
6802 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6803 START();
6804
6805 uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
6806 InsrHelper(&masm, z31.VnD(), in);
6807
6808 // For simplicity, we re-use the same pg for various lane sizes.
6809 // For D lanes: 1, 0, 1
6810 // For S lanes: 1, 1, 0, 0, 1
6811 // For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
6812 int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
6813 Initialise(&masm, p0.VnB(), pg_in);
6814
6815 // Make a copy so we can check that constructive operations preserve zn.
6816 __ Mov(z0, z31);
6817 __ Sminv(b0, p0, z0.VnB()); // destructive
6818 __ Sminv(h1, p0, z31.VnH());
6819 __ Mov(z2, z31);
6820 __ Sminv(s2, p0, z2.VnS()); // destructive
6821 __ Sminv(d3, p0, z31.VnD());
6822
6823 __ Uminv(b4, p0, z31.VnB());
6824 __ Mov(z5, z31);
6825 __ Uminv(h5, p0, z5.VnH()); // destructive
6826 __ Uminv(s6, p0, z31.VnS());
6827 __ Mov(z7, z31);
6828 __ Uminv(d7, p0, z7.VnD()); // destructive
6829
6830 END();
6831
6832 if (CAN_RUN()) {
6833 RUN();
6834
6835 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
6836 // Sminv
6837 ASSERT_EQUAL_64(0xaa, d0);
6838 ASSERT_EQUAL_64(0xaabb, d1);
6839 ASSERT_EQUAL_64(0xaabbfc00, d2);
6840 ASSERT_EQUAL_64(0x00112233aabbfc00, d3); // The smaller lane is inactive.
6841 // Uminv
6842 ASSERT_EQUAL_64(0, d4);
6843 ASSERT_EQUAL_64(0x2233, d5);
6844 ASSERT_EQUAL_64(0x112233, d6);
6845 ASSERT_EQUAL_64(0x00112233aabbfc00, d7); // The smaller lane is inactive.
6846 } else {
6847 // Sminv
6848 ASSERT_EQUAL_64(0xaa, d0);
6849 ASSERT_EQUAL_64(0xaaaa, d1);
6850 ASSERT_EQUAL_64(0xaaaaaaaa, d2);
6851 ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d3);
6852 // Uminv
6853 ASSERT_EQUAL_64(0, d4);
6854 ASSERT_EQUAL_64(0x2233, d5);
6855 ASSERT_EQUAL_64(0x112233, d6);
6856 ASSERT_EQUAL_64(0x00112233aabbfc00, d7);
6857 }
6858
6859 // Check the upper lanes above the top of the V register are all clear.
6860 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
6861 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
6862 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
6863 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
6864 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
6865 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
6866 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
6867 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
6868 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
6869 }
6870 }
6871}
6872
6873TEST_SVE(sve_smaxv_umaxv) {
6874 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6875 START();
6876
6877 uint64_t in[] = {0xfffa5555aaaaaaaa, 0x0011223344aafe80, 0x00112233aabbfc00};
6878 InsrHelper(&masm, z31.VnD(), in);
6879
6880 // For simplicity, we re-use the same pg for various lane sizes.
6881 // For D lanes: 1, 0, 1
6882 // For S lanes: 1, 1, 0, 0, 1
6883 // For H lanes: 1, 1, 0, 1, 1, 0, 0, 0, 1, 1
6884 int pg_in[] = {1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1};
6885 Initialise(&masm, p0.VnB(), pg_in);
6886
6887 // Make a copy so we can check that constructive operations preserve zn.
6888 __ Mov(z0, z31);
6889 __ Smaxv(b0, p0, z0.VnB()); // destructive
6890 __ Smaxv(h1, p0, z31.VnH());
6891 __ Mov(z2, z31);
6892 __ Smaxv(s2, p0, z2.VnS()); // destructive
6893 __ Smaxv(d3, p0, z31.VnD());
6894
6895 __ Umaxv(b4, p0, z31.VnB());
6896 __ Mov(z5, z31);
6897 __ Umaxv(h5, p0, z5.VnH()); // destructive
6898 __ Umaxv(s6, p0, z31.VnS());
6899 __ Mov(z7, z31);
6900 __ Umaxv(d7, p0, z7.VnD()); // destructive
6901
6902 END();
6903
6904 if (CAN_RUN()) {
6905 RUN();
6906
6907 if (static_cast<int>(ArrayLength(pg_in)) >= config->sve_vl_in_bytes()) {
6908 // Smaxv
6909 ASSERT_EQUAL_64(0x33, d0);
6910 ASSERT_EQUAL_64(0x44aa, d1);
6911 ASSERT_EQUAL_64(0x112233, d2);
6912 ASSERT_EQUAL_64(0x112233aabbfc00, d3);
6913 // Umaxv
6914 ASSERT_EQUAL_64(0xfe, d4);
6915 ASSERT_EQUAL_64(0xfc00, d5);
6916 ASSERT_EQUAL_64(0xaabbfc00, d6);
6917 ASSERT_EQUAL_64(0x112233aabbfc00, d7);
6918 } else {
6919 // Smaxv
6920 ASSERT_EQUAL_64(0x33, d0);
6921 ASSERT_EQUAL_64(0x44aa, d1);
6922 ASSERT_EQUAL_64(0x112233, d2);
6923 ASSERT_EQUAL_64(0x00112233aabbfc00, d3);
6924 // Umaxv
6925 ASSERT_EQUAL_64(0xfe, d4);
6926 ASSERT_EQUAL_64(0xfc00, d5);
6927 ASSERT_EQUAL_64(0xaabbfc00, d6);
6928 ASSERT_EQUAL_64(0xfffa5555aaaaaaaa, d7);
6929 }
6930
6931 // Check the upper lanes above the top of the V register are all clear.
6932 for (int i = 1; i < core.GetSVELaneCount(kDRegSize); i++) {
6933 ASSERT_EQUAL_SVE_LANE(0, z0.VnD(), i);
6934 ASSERT_EQUAL_SVE_LANE(0, z1.VnD(), i);
6935 ASSERT_EQUAL_SVE_LANE(0, z2.VnD(), i);
6936 ASSERT_EQUAL_SVE_LANE(0, z3.VnD(), i);
6937 ASSERT_EQUAL_SVE_LANE(0, z4.VnD(), i);
6938 ASSERT_EQUAL_SVE_LANE(0, z5.VnD(), i);
6939 ASSERT_EQUAL_SVE_LANE(0, z6.VnD(), i);
6940 ASSERT_EQUAL_SVE_LANE(0, z7.VnD(), i);
6941 }
6942 }
6943}
6944
TatWai Chong4d2a4e92019-10-23 16:19:32 -07006945typedef void (MacroAssembler::*SdotUdotFn)(const ZRegister& zd,
6946 const ZRegister& za,
6947 const ZRegister& zn,
6948 const ZRegister& zm);
6949
6950template <typename Td, typename Ts, typename Te>
6951static void SdotUdotHelper(Test* config,
6952 SdotUdotFn macro,
6953 unsigned lane_size_in_bits,
6954 const Td& zd_inputs,
6955 const Td& za_inputs,
6956 const Ts& zn_inputs,
6957 const Ts& zm_inputs,
6958 const Te& zd_expected,
6959 const Te& zdnm_expected) {
6960 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
6961 START();
6962
6963 ZRegister zd = z0.WithLaneSize(lane_size_in_bits);
6964 ZRegister za = z1.WithLaneSize(lane_size_in_bits);
6965 ZRegister zn = z2.WithLaneSize(lane_size_in_bits / 4);
6966 ZRegister zm = z3.WithLaneSize(lane_size_in_bits / 4);
6967
6968 InsrHelper(&masm, zd, zd_inputs);
6969 InsrHelper(&masm, za, za_inputs);
6970 InsrHelper(&masm, zn, zn_inputs);
6971 InsrHelper(&masm, zm, zm_inputs);
6972
6973 // The Dot macro handles arbitrarily-aliased registers in the argument list.
6974 ZRegister da_result = z10.WithLaneSize(lane_size_in_bits);
6975 ZRegister dn_result = z11.WithLaneSize(lane_size_in_bits);
6976 ZRegister dm_result = z12.WithLaneSize(lane_size_in_bits);
6977 ZRegister dnm_result = z13.WithLaneSize(lane_size_in_bits);
6978 ZRegister d_result = z14.WithLaneSize(lane_size_in_bits);
6979
6980 __ Mov(da_result, za);
6981 // zda = zda + (zn . zm)
6982 (masm.*macro)(da_result, da_result, zn, zm);
6983
6984 __ Mov(dn_result, zn);
6985 // zdn = za + (zdn . zm)
Jacob Bramley378fc892019-10-30 11:26:09 +00006986 (masm.*macro)(dn_result, za, dn_result.WithSameLaneSizeAs(zn), zm);
TatWai Chong4d2a4e92019-10-23 16:19:32 -07006987
6988 __ Mov(dm_result, zm);
6989 // zdm = za + (zn . zdm)
Jacob Bramley378fc892019-10-30 11:26:09 +00006990 (masm.*macro)(dm_result, za, zn, dm_result.WithSameLaneSizeAs(zm));
TatWai Chong4d2a4e92019-10-23 16:19:32 -07006991
6992 __ Mov(d_result, zd);
6993 // zd = za + (zn . zm)
6994 (masm.*macro)(d_result, za, zn, zm);
6995
6996 __ Mov(dnm_result, zn);
6997 // zdnm = za + (zdmn . zdnm)
Jacob Bramley378fc892019-10-30 11:26:09 +00006998 (masm.*macro)(dnm_result,
6999 za,
7000 dnm_result.WithSameLaneSizeAs(zn),
7001 dnm_result.WithSameLaneSizeAs(zm));
TatWai Chong4d2a4e92019-10-23 16:19:32 -07007002
7003 END();
7004
7005 if (CAN_RUN()) {
7006 RUN();
7007
7008 ASSERT_EQUAL_SVE(za_inputs, z1.WithLaneSize(lane_size_in_bits));
7009 ASSERT_EQUAL_SVE(zn_inputs, z2.WithLaneSize(lane_size_in_bits / 4));
7010 ASSERT_EQUAL_SVE(zm_inputs, z3.WithLaneSize(lane_size_in_bits / 4));
7011
7012 ASSERT_EQUAL_SVE(zd_expected, da_result);
7013 ASSERT_EQUAL_SVE(zd_expected, dn_result);
7014 ASSERT_EQUAL_SVE(zd_expected, dm_result);
7015 ASSERT_EQUAL_SVE(zd_expected, d_result);
7016
7017 ASSERT_EQUAL_SVE(zdnm_expected, dnm_result);
7018 }
7019}
7020
7021TEST_SVE(sve_sdot) {
7022 int zd_inputs[] = {0x33, 0xee, 0xff};
7023 int za_inputs[] = {INT32_MAX, -3, 2};
7024 int zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
7025 int zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
7026
7027 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
7028 int32_t zd_expected_s[] = {-2147418113, -183, 133}; // 0x8000ffff
7029 int64_t zd_expected_d[] = {2147549183, -183, 133}; // 0x8000ffff
7030
7031 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
7032 int32_t zdnm_expected_s[] = {-2147418113, 980, 572};
7033 int64_t zdnm_expected_d[] = {2147549183, 980, 572};
7034
7035 SdotUdotHelper(config,
7036 &MacroAssembler::Sdot,
7037 kSRegSize,
7038 zd_inputs,
7039 za_inputs,
7040 zn_inputs,
7041 zm_inputs,
7042 zd_expected_s,
7043 zdnm_expected_s);
7044 SdotUdotHelper(config,
7045 &MacroAssembler::Sdot,
7046 kDRegSize,
7047 zd_inputs,
7048 za_inputs,
7049 zn_inputs,
7050 zm_inputs,
7051 zd_expected_d,
7052 zdnm_expected_d);
7053}
7054
7055TEST_SVE(sve_udot) {
7056 int zd_inputs[] = {0x33, 0xee, 0xff};
7057 int za_inputs[] = {INT32_MAX, -3, 2};
7058 int zn_inputs[] = {-128, -128, -128, -128, 9, -1, 1, 30, -5, -20, 9, 8};
7059 int zm_inputs[] = {-128, -128, -128, -128, -19, 15, 6, 0, 9, -5, 4, 5};
7060
7061 // zd_expected[] = za_inputs[] + (zn_inputs[] . zm_inputs[])
7062 uint32_t zd_expected_s[] = {0x8000ffff, 0x00001749, 0x0000f085};
7063 uint64_t zd_expected_d[] = {0x000000047c00ffff,
7064 0x000000000017ff49,
7065 0x00000000fff00085};
7066
7067 // zdnm_expected[] = za_inputs[] + (zn_inputs[] . zn_inputs[])
7068 uint32_t zdnm_expected_s[] = {0x8000ffff, 0x000101d4, 0x0001d03c};
7069 uint64_t zdnm_expected_d[] = {0x000000047c00ffff,
7070 0x00000000fffe03d4,
7071 0x00000001ffce023c};
7072
7073 SdotUdotHelper(config,
7074 &MacroAssembler::Udot,
7075 kSRegSize,
7076 zd_inputs,
7077 za_inputs,
7078 zn_inputs,
7079 zm_inputs,
7080 zd_expected_s,
7081 zdnm_expected_s);
7082 SdotUdotHelper(config,
7083 &MacroAssembler::Udot,
7084 kDRegSize,
7085 zd_inputs,
7086 za_inputs,
7087 zn_inputs,
7088 zm_inputs,
7089 zd_expected_d,
7090 zdnm_expected_d);
7091}
7092
TatWai Chong7a0d3672019-10-23 17:35:18 -07007093template <typename T, size_t N>
7094static void FPToRawbitsWithSize(const T (&inputs)[N],
7095 uint64_t* outputs,
7096 unsigned size_in_bits) {
TatWai Chongfe536042019-10-23 16:34:11 -07007097 for (size_t i = 0; i < N; i++) {
TatWai Chong7a0d3672019-10-23 17:35:18 -07007098 outputs[i] = vixl::FPToRawbitsWithSize(size_in_bits, inputs[i]);
TatWai Chongfe536042019-10-23 16:34:11 -07007099 }
7100}
7101
TatWai Chong7a0d3672019-10-23 17:35:18 -07007102template <typename Ti, typename Te, size_t N>
7103static void FPBinArithHelper(Test* config,
7104 ArithFn macro,
7105 int lane_size_in_bits,
7106 const Ti (&zn_inputs)[N],
7107 const Ti (&zm_inputs)[N],
7108 const Te (&zd_expected)[N]) {
TatWai Chongfe536042019-10-23 16:34:11 -07007109 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
TatWai Chong7a0d3672019-10-23 17:35:18 -07007110
TatWai Chongfe536042019-10-23 16:34:11 -07007111 START();
7112
7113 ZRegister zd = z29.WithLaneSize(lane_size_in_bits);
7114 ZRegister zn = z30.WithLaneSize(lane_size_in_bits);
7115 ZRegister zm = z31.WithLaneSize(lane_size_in_bits);
7116
7117 uint64_t zn_rawbits[N];
7118 uint64_t zm_rawbits[N];
7119
TatWai Chong7a0d3672019-10-23 17:35:18 -07007120 FPToRawbitsWithSize(zn_inputs, zn_rawbits, lane_size_in_bits);
7121 FPToRawbitsWithSize(zm_inputs, zm_rawbits, lane_size_in_bits);
TatWai Chongfe536042019-10-23 16:34:11 -07007122
7123 InsrHelper(&masm, zn, zn_rawbits);
7124 InsrHelper(&masm, zm, zm_rawbits);
7125
7126 (masm.*macro)(zd, zn, zm);
7127
7128 END();
7129
7130 if (CAN_RUN()) {
7131 RUN();
7132
7133 ASSERT_EQUAL_SVE(zd_expected, zd);
7134 }
7135}
7136
7137TEST_SVE(sve_fp_arithmetic_unpredicated_fadd) {
7138 double zn_inputs[] = {24.0,
7139 5.5,
7140 0.0,
7141 3.875,
7142 2.125,
7143 kFP64PositiveInfinity,
7144 kFP64NegativeInfinity};
7145
7146 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
7147
TatWai Chong7a0d3672019-10-23 17:35:18 -07007148 ArithFn fn = &MacroAssembler::Fadd;
TatWai Chongfe536042019-10-23 16:34:11 -07007149
7150 uint16_t expected_h[] = {Float16ToRawbits(Float16(1048.0)),
7151 Float16ToRawbits(Float16(2053.5)),
7152 Float16ToRawbits(Float16(0.1)),
7153 Float16ToRawbits(Float16(-0.875)),
7154 Float16ToRawbits(Float16(14.465)),
7155 Float16ToRawbits(kFP16PositiveInfinity),
7156 Float16ToRawbits(kFP16NegativeInfinity)};
7157
TatWai Chong7a0d3672019-10-23 17:35:18 -07007158 FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chongfe536042019-10-23 16:34:11 -07007159
7160 uint32_t expected_s[] = {FloatToRawbits(1048.0f),
7161 FloatToRawbits(2053.5f),
7162 FloatToRawbits(0.1f),
7163 FloatToRawbits(-0.875f),
7164 FloatToRawbits(14.465f),
7165 FloatToRawbits(kFP32PositiveInfinity),
7166 FloatToRawbits(kFP32NegativeInfinity)};
7167
TatWai Chong7a0d3672019-10-23 17:35:18 -07007168 FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chongfe536042019-10-23 16:34:11 -07007169
7170 uint64_t expected_d[] = {DoubleToRawbits(1048.0),
7171 DoubleToRawbits(2053.5),
7172 DoubleToRawbits(0.1),
7173 DoubleToRawbits(-0.875),
7174 DoubleToRawbits(14.465),
7175 DoubleToRawbits(kFP64PositiveInfinity),
7176 DoubleToRawbits(kFP64NegativeInfinity)};
7177
TatWai Chong7a0d3672019-10-23 17:35:18 -07007178 FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chongfe536042019-10-23 16:34:11 -07007179}
7180
7181TEST_SVE(sve_fp_arithmetic_unpredicated_fsub) {
7182 double zn_inputs[] = {24.0,
7183 5.5,
7184 0.0,
7185 3.875,
7186 2.125,
7187 kFP64PositiveInfinity,
7188 kFP64NegativeInfinity};
7189
7190 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
7191
TatWai Chong7a0d3672019-10-23 17:35:18 -07007192 ArithFn fn = &MacroAssembler::Fsub;
TatWai Chongfe536042019-10-23 16:34:11 -07007193
7194 uint16_t expected_h[] = {Float16ToRawbits(Float16(-1000.0)),
7195 Float16ToRawbits(Float16(-2042.5)),
7196 Float16ToRawbits(Float16(-0.1)),
7197 Float16ToRawbits(Float16(8.625)),
7198 Float16ToRawbits(Float16(-10.215)),
7199 Float16ToRawbits(kFP16PositiveInfinity),
7200 Float16ToRawbits(kFP16NegativeInfinity)};
7201
TatWai Chong7a0d3672019-10-23 17:35:18 -07007202 FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chongfe536042019-10-23 16:34:11 -07007203
7204 uint32_t expected_s[] = {FloatToRawbits(-1000.0),
7205 FloatToRawbits(-2042.5),
7206 FloatToRawbits(-0.1),
7207 FloatToRawbits(8.625),
7208 FloatToRawbits(-10.215),
7209 FloatToRawbits(kFP32PositiveInfinity),
7210 FloatToRawbits(kFP32NegativeInfinity)};
7211
TatWai Chong7a0d3672019-10-23 17:35:18 -07007212 FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chongfe536042019-10-23 16:34:11 -07007213
7214 uint64_t expected_d[] = {DoubleToRawbits(-1000.0),
7215 DoubleToRawbits(-2042.5),
7216 DoubleToRawbits(-0.1),
7217 DoubleToRawbits(8.625),
7218 DoubleToRawbits(-10.215),
7219 DoubleToRawbits(kFP64PositiveInfinity),
7220 DoubleToRawbits(kFP64NegativeInfinity)};
7221
TatWai Chong7a0d3672019-10-23 17:35:18 -07007222 FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chongfe536042019-10-23 16:34:11 -07007223}
7224
7225TEST_SVE(sve_fp_arithmetic_unpredicated_fmul) {
7226 double zn_inputs[] = {24.0,
7227 5.5,
7228 0.0,
7229 3.875,
7230 2.125,
7231 kFP64PositiveInfinity,
7232 kFP64NegativeInfinity};
7233
7234 double zm_inputs[] = {1024.0, 2048.0, 0.1, -4.75, 12.34, 255.0, -13.0};
7235
TatWai Chong7a0d3672019-10-23 17:35:18 -07007236 ArithFn fn = &MacroAssembler::Fmul;
TatWai Chongfe536042019-10-23 16:34:11 -07007237
7238 uint16_t expected_h[] = {Float16ToRawbits(Float16(24576.0)),
7239 Float16ToRawbits(Float16(11264.0)),
7240 Float16ToRawbits(Float16(0.0)),
7241 Float16ToRawbits(Float16(-18.4)),
7242 Float16ToRawbits(Float16(26.23)),
7243 Float16ToRawbits(kFP16PositiveInfinity),
7244 Float16ToRawbits(kFP16PositiveInfinity)};
7245
TatWai Chong7a0d3672019-10-23 17:35:18 -07007246 FPBinArithHelper(config, fn, kHRegSize, zn_inputs, zm_inputs, expected_h);
TatWai Chongfe536042019-10-23 16:34:11 -07007247
7248 uint32_t expected_s[] = {FloatToRawbits(24576.0),
7249 FloatToRawbits(11264.0),
7250 FloatToRawbits(0.0),
7251 FloatToRawbits(-18.40625),
7252 FloatToRawbits(26.2225),
7253 FloatToRawbits(kFP32PositiveInfinity),
7254 FloatToRawbits(kFP32PositiveInfinity)};
7255
TatWai Chong7a0d3672019-10-23 17:35:18 -07007256 FPBinArithHelper(config, fn, kSRegSize, zn_inputs, zm_inputs, expected_s);
TatWai Chongfe536042019-10-23 16:34:11 -07007257
7258 uint64_t expected_d[] = {DoubleToRawbits(24576.0),
7259 DoubleToRawbits(11264.0),
7260 DoubleToRawbits(0.0),
7261 DoubleToRawbits(-18.40625),
7262 DoubleToRawbits(26.2225),
7263 DoubleToRawbits(kFP64PositiveInfinity),
7264 DoubleToRawbits(kFP64PositiveInfinity)};
7265
TatWai Chong7a0d3672019-10-23 17:35:18 -07007266 FPBinArithHelper(config, fn, kDRegSize, zn_inputs, zm_inputs, expected_d);
TatWai Chongfe536042019-10-23 16:34:11 -07007267}
7268
TatWai Chong7a0d3672019-10-23 17:35:18 -07007269typedef void (MacroAssembler::*FPArithPredicatedFn)(
7270 const ZRegister& zd,
7271 const PRegisterM& pg,
7272 const ZRegister& zn,
7273 const ZRegister& zm,
7274 FPMacroNaNPropagationOption nan_option);
7275
7276template <typename Ti, typename Te, size_t N>
7277static void FPBinArithHelper(
7278 Test* config,
7279 FPArithPredicatedFn macro,
7280 unsigned lane_size_in_bits,
7281 const Ti (&zd_inputs)[N],
7282 const int (&pg_inputs)[N],
7283 const Ti (&zn_inputs)[N],
7284 const Ti (&zm_inputs)[N],
7285 const Te (&zd_expected)[N],
7286 FPMacroNaNPropagationOption nan_option = FastNaNPropagation) {
TatWai Chongd316c5e2019-10-16 12:22:10 -07007287 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7288 START();
7289
TatWai Chong7a0d3672019-10-23 17:35:18 -07007290 // Avoid choosing default scratch registers.
7291 ZRegister zd = z26.WithLaneSize(lane_size_in_bits);
7292 ZRegister zn = z27.WithLaneSize(lane_size_in_bits);
7293 ZRegister zm = z28.WithLaneSize(lane_size_in_bits);
TatWai Chongd316c5e2019-10-16 12:22:10 -07007294
TatWai Chong7a0d3672019-10-23 17:35:18 -07007295 uint64_t zn_inputs_rawbits[N];
7296 uint64_t zm_inputs_rawbits[N];
7297 uint64_t zd_inputs_rawbits[N];
TatWai Chongd316c5e2019-10-16 12:22:10 -07007298
TatWai Chong7a0d3672019-10-23 17:35:18 -07007299 FPToRawbitsWithSize(zn_inputs, zn_inputs_rawbits, lane_size_in_bits);
7300 FPToRawbitsWithSize(zm_inputs, zm_inputs_rawbits, lane_size_in_bits);
7301 FPToRawbitsWithSize(zd_inputs, zd_inputs_rawbits, lane_size_in_bits);
7302
7303 InsrHelper(&masm, zn, zn_inputs_rawbits);
7304 InsrHelper(&masm, zm, zm_inputs_rawbits);
7305 InsrHelper(&masm, zd, zd_inputs_rawbits);
TatWai Chongd316c5e2019-10-16 12:22:10 -07007306
7307 PRegisterWithLaneSize pg = p0.WithLaneSize(lane_size_in_bits);
7308 Initialise(&masm, pg, pg_inputs);
7309
7310 // `instr` zdn, pg, zdn, zm
7311 ZRegister dn_result = z0.WithLaneSize(lane_size_in_bits);
7312 __ Mov(dn_result, zn);
TatWai Chong7a0d3672019-10-23 17:35:18 -07007313 (masm.*macro)(dn_result, pg.Merging(), dn_result, zm, nan_option);
TatWai Chongd316c5e2019-10-16 12:22:10 -07007314
7315 // Based on whether zd and zm registers are aliased, the macro of instructions
7316 // (`Instr`) swaps the order of operands if it has the commutative property,
7317 // otherwise, transfer to the reversed `Instr`, such as fdivr.
7318 // `instr` zdm, pg, zn, zdm
7319 ZRegister dm_result = z1.WithLaneSize(lane_size_in_bits);
7320 __ Mov(dm_result, zm);
TatWai Chong7a0d3672019-10-23 17:35:18 -07007321 (masm.*macro)(dm_result, pg.Merging(), zn, dm_result, nan_option);
TatWai Chongd316c5e2019-10-16 12:22:10 -07007322
7323 // The macro of instructions (`Instr`) automatically selects between `instr`
7324 // and movprfx + `instr` based on whether zd and zn registers are aliased.
7325 // A generated movprfx instruction is predicated that using the same
7326 // governing predicate register. In order to keep the result constant,
7327 // initialize the destination register first.
7328 // `instr` zd, pg, zn, zm
7329 ZRegister d_result = z2.WithLaneSize(lane_size_in_bits);
7330 __ Mov(d_result, zd);
TatWai Chong7a0d3672019-10-23 17:35:18 -07007331 (masm.*macro)(d_result, pg.Merging(), zn, zm, nan_option);
TatWai Chongd316c5e2019-10-16 12:22:10 -07007332
7333 END();
7334
7335 if (CAN_RUN()) {
7336 RUN();
7337
7338 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
7339 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
7340 if (!core.HasSVELane(dn_result, lane)) break;
7341 if ((pg_inputs[i] & 1) != 0) {
7342 ASSERT_EQUAL_SVE_LANE(zd_expected[i], dn_result, lane);
7343 } else {
TatWai Chong7a0d3672019-10-23 17:35:18 -07007344 ASSERT_EQUAL_SVE_LANE(zn_inputs_rawbits[i], dn_result, lane);
TatWai Chongd316c5e2019-10-16 12:22:10 -07007345 }
7346 }
7347
7348 for (size_t i = 0; i < ArrayLength(zd_expected); i++) {
7349 int lane = static_cast<int>(ArrayLength(zd_expected) - i - 1);
7350 if (!core.HasSVELane(dm_result, lane)) break;
7351 if ((pg_inputs[i] & 1) != 0) {
7352 ASSERT_EQUAL_SVE_LANE(zd_expected[i], dm_result, lane);
7353 } else {
TatWai Chong7a0d3672019-10-23 17:35:18 -07007354 ASSERT_EQUAL_SVE_LANE(zm_inputs_rawbits[i], dm_result, lane);
TatWai Chongd316c5e2019-10-16 12:22:10 -07007355 }
7356 }
7357
7358 ASSERT_EQUAL_SVE(zd_expected, d_result);
7359 }
7360}
7361
7362TEST_SVE(sve_binary_arithmetic_predicated_fdiv) {
TatWai Chong7a0d3672019-10-23 17:35:18 -07007363 // The inputs are shared with different precision tests.
TatWai Chongd316c5e2019-10-16 12:22:10 -07007364 double zd_in[] = {0.1, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9};
7365
7366 double zn_in[] = {24.0,
7367 24.0,
7368 -2.0,
7369 -2.0,
7370 5.5,
7371 5.5,
7372 kFP64PositiveInfinity,
7373 kFP64PositiveInfinity,
7374 kFP64NegativeInfinity,
7375 kFP64NegativeInfinity};
7376
7377 double zm_in[] = {-2.0, -2.0, 24.0, 24.0, 0.5, 0.5, 0.65, 0.65, 24.0, 24.0};
7378
TatWai Chongd316c5e2019-10-16 12:22:10 -07007379 int pg_in[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
7380
TatWai Chong7a0d3672019-10-23 17:35:18 -07007381 uint16_t exp_h[] = {Float16ToRawbits(Float16(0.1)),
TatWai Chongd316c5e2019-10-16 12:22:10 -07007382 Float16ToRawbits(Float16(-12.0)),
7383 Float16ToRawbits(Float16(2.2)),
7384 Float16ToRawbits(Float16(-0.0833)),
7385 Float16ToRawbits(Float16(4.4)),
7386 Float16ToRawbits(Float16(11.0)),
7387 Float16ToRawbits(Float16(6.6)),
7388 Float16ToRawbits(kFP16PositiveInfinity),
7389 Float16ToRawbits(Float16(8.8)),
7390 Float16ToRawbits(kFP16NegativeInfinity)};
7391
TatWai Chong7a0d3672019-10-23 17:35:18 -07007392 FPBinArithHelper(config,
7393 &MacroAssembler::Fdiv,
7394 kHRegSize,
7395 zd_in,
7396 pg_in,
7397 zn_in,
7398 zm_in,
7399 exp_h);
TatWai Chongd316c5e2019-10-16 12:22:10 -07007400
7401 uint32_t exp_s[] = {FloatToRawbits(0.1),
7402 FloatToRawbits(-12.0),
7403 FloatToRawbits(2.2),
7404 0xbdaaaaab,
7405 FloatToRawbits(4.4),
7406 FloatToRawbits(11.0),
7407 FloatToRawbits(6.6),
7408 FloatToRawbits(kFP32PositiveInfinity),
7409 FloatToRawbits(8.8),
7410 FloatToRawbits(kFP32NegativeInfinity)};
7411
TatWai Chong7a0d3672019-10-23 17:35:18 -07007412 FPBinArithHelper(config,
7413 &MacroAssembler::Fdiv,
7414 kSRegSize,
7415 zd_in,
7416 pg_in,
7417 zn_in,
7418 zm_in,
7419 exp_s);
TatWai Chongd316c5e2019-10-16 12:22:10 -07007420
7421 uint64_t exp_d[] = {DoubleToRawbits(0.1),
7422 DoubleToRawbits(-12.0),
7423 DoubleToRawbits(2.2),
7424 0xbfb5555555555555,
7425 DoubleToRawbits(4.4),
7426 DoubleToRawbits(11.0),
7427 DoubleToRawbits(6.6),
7428 DoubleToRawbits(kFP64PositiveInfinity),
7429 DoubleToRawbits(8.8),
7430 DoubleToRawbits(kFP64NegativeInfinity)};
7431
TatWai Chong7a0d3672019-10-23 17:35:18 -07007432 FPBinArithHelper(config,
7433 &MacroAssembler::Fdiv,
7434 kDRegSize,
7435 zd_in,
7436 pg_in,
7437 zn_in,
7438 zm_in,
7439 exp_d);
TatWai Chongd316c5e2019-10-16 12:22:10 -07007440}
7441
Martyn Capewell9cc3f142019-10-29 14:06:35 +00007442TEST_SVE(sve_select) {
7443 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
7444 START();
7445
7446 uint64_t in0[] = {0x01f203f405f607f8, 0xfefcf8f0e1c3870f, 0x123456789abcdef0};
7447 uint64_t in1[] = {0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa};
7448
7449 // For simplicity, we re-use the same pg for various lane sizes.
7450 // For D lanes: 1, 1, 0
7451 // For S lanes: 1, 1, 1, 0, 0
7452 // For H lanes: 0, 1, 0, 1, 1, 1, 0, 0, 1, 0
7453 int pg_in[] = {1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0};
7454 Initialise(&masm, p0.VnB(), pg_in);
7455 PRegisterM pg = p0.Merging();
7456
7457 InsrHelper(&masm, z30.VnD(), in0);
7458 InsrHelper(&masm, z31.VnD(), in1);
7459
7460 __ Sel(z0.VnB(), pg, z30.VnB(), z31.VnB());
7461 __ Sel(z1.VnH(), pg, z30.VnH(), z31.VnH());
7462 __ Sel(z2.VnS(), pg, z30.VnS(), z31.VnS());
7463 __ Sel(z3.VnD(), pg, z30.VnD(), z31.VnD());
7464
7465 END();
7466
7467 if (CAN_RUN()) {
7468 RUN();
7469
7470 uint64_t expected_z0[] = {0xaaaaaaaa05aa07f8,
7471 0xfeaaaaf0aac3870f,
7472 0xaaaa56aa9abcdeaa};
7473 ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
7474
7475 uint64_t expected_z1[] = {0xaaaaaaaaaaaa07f8,
7476 0xaaaaf8f0e1c3870f,
7477 0xaaaaaaaa9abcaaaa};
7478 ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
7479
7480 uint64_t expected_z2[] = {0xaaaaaaaa05f607f8,
7481 0xfefcf8f0e1c3870f,
7482 0xaaaaaaaaaaaaaaaa};
7483 ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
7484
7485 uint64_t expected_z3[] = {0x01f203f405f607f8,
7486 0xfefcf8f0e1c3870f,
7487 0xaaaaaaaaaaaaaaaa};
7488 ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
7489 }
7490}
TatWai Chongd316c5e2019-10-16 12:22:10 -07007491
TatWai Chong7a0d3672019-10-23 17:35:18 -07007492// Execute a number of instructions which all use ProcessNaNs, and check that
7493// they all propagate NaNs correctly.
7494template <typename Ti, typename Te, size_t N>
7495static void ProcessNaNsHelper(Test* config,
7496 int lane_size_in_bits,
7497 const Ti (&zn_inputs)[N],
7498 const Ti (&zm_inputs)[N],
7499 const Te (&zd_expected)[N],
7500 FPMacroNaNPropagationOption nan_option) {
7501 ArithFn unpredicated_macro[] = {&MacroAssembler::Fadd,
7502 &MacroAssembler::Fsub,
7503 &MacroAssembler::Fmul};
7504
7505 for (size_t i = 0; i < ArrayLength(unpredicated_macro); i++) {
7506 FPBinArithHelper(config,
7507 unpredicated_macro[i],
7508 lane_size_in_bits,
7509 zn_inputs,
7510 zm_inputs,
7511 zd_expected);
7512 }
7513
7514 FPArithPredicatedFn predicated_macro[] = {&MacroAssembler::Fdiv,
7515 &MacroAssembler::Fmax,
7516 &MacroAssembler::Fmin};
7517 int pg_inputs[N];
7518 // With an all-true predicate, this helper aims to compare with special
7519 // numbers.
7520 for (size_t i = 0; i < N; i++) {
7521 pg_inputs[i] = 1;
7522 }
7523
7524 for (size_t i = 0; i < ArrayLength(predicated_macro); i++) {
7525 FPBinArithHelper(config,
7526 predicated_macro[i],
7527 lane_size_in_bits,
7528 // With an all-true predicate, the value in zd is
7529 // irrelevant to the operations.
7530 zn_inputs,
7531 pg_inputs,
7532 zn_inputs,
7533 zm_inputs,
7534 zd_expected,
7535 nan_option);
7536 }
7537}
7538
7539TEST_SVE(sve_process_nans_double) {
7540 // Use non-standard NaNs to check that the payload bits are preserved.
7541 double sn = RawbitsToDouble(0x7ff5555511111111);
7542 double sm = RawbitsToDouble(0x7ff5555522222222);
7543 double qn = RawbitsToDouble(0x7ffaaaaa11111111);
7544 double qm = RawbitsToDouble(0x7ffaaaaa22222222);
7545 VIXL_ASSERT(IsSignallingNaN(sn));
7546 VIXL_ASSERT(IsSignallingNaN(sm));
7547 VIXL_ASSERT(IsQuietNaN(qn));
7548 VIXL_ASSERT(IsQuietNaN(qm));
7549
7550 // The input NaNs after passing through ProcessNaN.
7551 uint64_t sn_proc = 0x7ffd555511111111;
7552 uint64_t sm_proc = 0x7ffd555522222222;
7553 uint64_t qn_proc = DoubleToRawbits(qn);
7554 uint64_t qm_proc = DoubleToRawbits(qm);
7555
7556 // Quiet NaNs are propagated.
7557 double zn_inputs_1[] = {qn, 0.0, 0.0, qm, qn, qm};
7558 double zm_inputs_1[] = {0.0, qn, qm, 0.0, qm, qn};
7559 uint64_t zd_expected_1[] =
7560 {qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
7561 ProcessNaNsHelper(config,
7562 kDRegSize,
7563 zn_inputs_1,
7564 zm_inputs_1,
7565 zd_expected_1,
7566 StrictNaNPropagation);
7567
7568 // Signalling NaNs are propagated.
7569 double zn_inputs_2[] = {sn, 0.0, 0.0, sm, sn, sm};
7570 double zm_inputs_2[] = {0.0, sn, sm, 0.0, sm, sn};
7571 uint64_t zd_expected_2[] =
7572 {sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
7573 ProcessNaNsHelper(config,
7574 kDRegSize,
7575 zn_inputs_2,
7576 zm_inputs_2,
7577 zd_expected_2,
7578 StrictNaNPropagation);
7579
7580 // Signalling NaNs take precedence over quiet NaNs.
7581 double zn_inputs_3[] = {sn, qn, sn, sn, qn};
7582 double zm_inputs_3[] = {qm, sm, sm, qn, sn};
7583 uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
7584 ProcessNaNsHelper(config,
7585 kDRegSize,
7586 zn_inputs_3,
7587 zm_inputs_3,
7588 zd_expected_3,
7589 StrictNaNPropagation);
7590}
7591
7592TEST_SVE(sve_process_nans_float) {
7593 // Use non-standard NaNs to check that the payload bits are preserved.
7594 float sn = RawbitsToFloat(0x7f951111);
7595 float sm = RawbitsToFloat(0x7f952222);
7596 float qn = RawbitsToFloat(0x7fea1111);
7597 float qm = RawbitsToFloat(0x7fea2222);
7598 VIXL_ASSERT(IsSignallingNaN(sn));
7599 VIXL_ASSERT(IsSignallingNaN(sm));
7600 VIXL_ASSERT(IsQuietNaN(qn));
7601 VIXL_ASSERT(IsQuietNaN(qm));
7602
7603 // The input NaNs after passing through ProcessNaN.
7604 uint32_t sn_proc = 0x7fd51111;
7605 uint32_t sm_proc = 0x7fd52222;
7606 uint32_t qn_proc = FloatToRawbits(qn);
7607 uint32_t qm_proc = FloatToRawbits(qm);
7608
7609 // Quiet NaNs are propagated.
7610 float zn_inputs_1[] = {qn, 0.0f, 0.0f, qm, qn, qm};
7611 float zm_inputs_1[] = {0.0f, qn, qm, 0.0f, qm, qn};
7612 uint64_t zd_expected_1[] =
7613 {qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
7614 ProcessNaNsHelper(config,
7615 kSRegSize,
7616 zn_inputs_1,
7617 zm_inputs_1,
7618 zd_expected_1,
7619 StrictNaNPropagation);
7620
7621 // Signalling NaNs are propagated.
7622 float zn_inputs_2[] = {sn, 0.0f, 0.0f, sm, sn, sm};
7623 float zm_inputs_2[] = {0.0f, sn, sm, 0.0f, sm, sn};
7624 uint64_t zd_expected_2[] =
7625 {sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
7626 ProcessNaNsHelper(config,
7627 kSRegSize,
7628 zn_inputs_2,
7629 zm_inputs_2,
7630 zd_expected_2,
7631 StrictNaNPropagation);
7632
7633 // Signalling NaNs take precedence over quiet NaNs.
7634 float zn_inputs_3[] = {sn, qn, sn, sn, qn};
7635 float zm_inputs_3[] = {qm, sm, sm, qn, sn};
7636 uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
7637 ProcessNaNsHelper(config,
7638 kSRegSize,
7639 zn_inputs_3,
7640 zm_inputs_3,
7641 zd_expected_3,
7642 StrictNaNPropagation);
7643}
7644
7645TEST_SVE(sve_process_nans_half) {
7646 // Use non-standard NaNs to check that the payload bits are preserved.
7647 Float16 sn(RawbitsToFloat16(0x7c11));
7648 Float16 sm(RawbitsToFloat16(0xfc22));
7649 Float16 qn(RawbitsToFloat16(0x7e33));
7650 Float16 qm(RawbitsToFloat16(0xfe44));
7651 VIXL_ASSERT(IsSignallingNaN(sn));
7652 VIXL_ASSERT(IsSignallingNaN(sm));
7653 VIXL_ASSERT(IsQuietNaN(qn));
7654 VIXL_ASSERT(IsQuietNaN(qm));
7655
7656 // The input NaNs after passing through ProcessNaN.
7657 uint16_t sn_proc = 0x7e11;
7658 uint16_t sm_proc = 0xfe22;
7659 uint16_t qn_proc = Float16ToRawbits(qn);
7660 uint16_t qm_proc = Float16ToRawbits(qm);
7661
7662 // Quiet NaNs are propagated.
7663 Float16 zn_inputs_1[] = {qn, Float16(0.0), Float16(0.0), qm, qn, qm};
7664 Float16 zm_inputs_1[] = {Float16(0.0), qn, qm, Float16(0.0), qm, qn};
7665 uint64_t zd_expected_1[] =
7666 {qn_proc, qn_proc, qm_proc, qm_proc, qn_proc, qm_proc};
7667 ProcessNaNsHelper(config,
7668 kHRegSize,
7669 zn_inputs_1,
7670 zm_inputs_1,
7671 zd_expected_1,
7672 StrictNaNPropagation);
7673
7674 // Signalling NaNs are propagated.
7675 Float16 zn_inputs_2[] = {sn, Float16(0.0), Float16(0.0), sm, sn, sm};
7676 Float16 zm_inputs_2[] = {Float16(0.0), sn, sm, Float16(0.0), sm, sn};
7677 uint64_t zd_expected_2[] =
7678 {sn_proc, sn_proc, sm_proc, sm_proc, sn_proc, sm_proc};
7679 ProcessNaNsHelper(config,
7680 kHRegSize,
7681 zn_inputs_2,
7682 zm_inputs_2,
7683 zd_expected_2,
7684 StrictNaNPropagation);
7685
7686 // Signalling NaNs take precedence over quiet NaNs.
7687 Float16 zn_inputs_3[] = {sn, qn, sn, sn, qn};
7688 Float16 zm_inputs_3[] = {qm, sm, sm, qn, sn};
7689 uint64_t zd_expected_3[] = {sn_proc, sm_proc, sn_proc, sn_proc, sn_proc};
7690 ProcessNaNsHelper(config,
7691 kHRegSize,
7692 zn_inputs_3,
7693 zm_inputs_3,
7694 zd_expected_3,
7695 StrictNaNPropagation);
7696}
7697
7698TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_h) {
7699 double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
7700 double zn_inputs[] = {-2.1,
7701 8.5,
7702 225.5,
7703 0.0,
7704 8.8,
7705 -4.75,
7706 kFP64PositiveInfinity,
7707 kFP64NegativeInfinity};
7708 double zm_inputs[] = {-2.0,
7709 -13.0,
7710 24.0,
7711 0.01,
7712 0.5,
7713 300.75,
7714 kFP64NegativeInfinity,
7715 kFP64PositiveInfinity};
7716 int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
7717
7718 uint16_t zd_expected_max[] = {Float16ToRawbits(Float16(-2.0)),
7719 Float16ToRawbits(Float16(8.5)),
7720 Float16ToRawbits(Float16(3.3)),
7721 Float16ToRawbits(Float16(0.01)),
7722 Float16ToRawbits(Float16(5.5)),
7723 Float16ToRawbits(Float16(300.75)),
7724 Float16ToRawbits(kFP16PositiveInfinity),
7725 Float16ToRawbits(kFP16PositiveInfinity)};
7726 FPBinArithHelper(config,
7727 &MacroAssembler::Fmax,
7728 kHRegSize,
7729 zd_inputs,
7730 pg_inputs,
7731 zn_inputs,
7732 zm_inputs,
7733 zd_expected_max);
7734
7735 uint16_t zd_expected_min[] = {Float16ToRawbits(Float16(-2.1)),
7736 Float16ToRawbits(Float16(-13.0)),
7737 Float16ToRawbits(Float16(3.3)),
7738 Float16ToRawbits(Float16(0.0)),
7739 Float16ToRawbits(Float16(5.5)),
7740 Float16ToRawbits(Float16(-4.75)),
7741 Float16ToRawbits(kFP16NegativeInfinity),
7742 Float16ToRawbits(kFP16NegativeInfinity)};
7743 FPBinArithHelper(config,
7744 &MacroAssembler::Fmin,
7745 kHRegSize,
7746 zd_inputs,
7747 pg_inputs,
7748 zn_inputs,
7749 zm_inputs,
7750 zd_expected_min);
7751}
7752
7753TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_s) {
7754 double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
7755 double zn_inputs[] = {-2.1,
7756 8.5,
7757 225.5,
7758 0.0,
7759 8.8,
7760 -4.75,
7761 kFP64PositiveInfinity,
7762 kFP64NegativeInfinity};
7763 double zm_inputs[] = {-2.0,
7764 -13.0,
7765 24.0,
7766 0.01,
7767 0.5,
7768 300.75,
7769 kFP64NegativeInfinity,
7770 kFP64PositiveInfinity};
7771 int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
7772
7773 uint32_t zd_expected_max[] = {FloatToRawbits(-2.0),
7774 FloatToRawbits(8.5),
7775 FloatToRawbits(3.3),
7776 FloatToRawbits(0.01),
7777 FloatToRawbits(5.5),
7778 FloatToRawbits(300.75),
7779 FloatToRawbits(kFP32PositiveInfinity),
7780 FloatToRawbits(kFP32PositiveInfinity)};
7781 FPBinArithHelper(config,
7782 &MacroAssembler::Fmax,
7783 kSRegSize,
7784 zd_inputs,
7785 pg_inputs,
7786 zn_inputs,
7787 zm_inputs,
7788 zd_expected_max);
7789
7790 uint32_t zd_expected_min[] = {FloatToRawbits(-2.1),
7791 FloatToRawbits(-13.0),
7792 FloatToRawbits(3.3),
7793 FloatToRawbits(0.0),
7794 FloatToRawbits(5.5),
7795 FloatToRawbits(-4.75),
7796 FloatToRawbits(kFP32NegativeInfinity),
7797 FloatToRawbits(kFP32NegativeInfinity)};
7798 FPBinArithHelper(config,
7799 &MacroAssembler::Fmin,
7800 kSRegSize,
7801 zd_inputs,
7802 pg_inputs,
7803 zn_inputs,
7804 zm_inputs,
7805 zd_expected_min);
7806}
7807
7808TEST_SVE(sve_binary_arithmetic_predicated_fmax_fmin_d) {
7809 double zd_inputs[] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
7810 double zn_inputs[] = {-2.1,
7811 8.5,
7812 225.5,
7813 0.0,
7814 8.8,
7815 -4.75,
7816 kFP64PositiveInfinity,
7817 kFP64NegativeInfinity};
7818 double zm_inputs[] = {-2.0,
7819 -13.0,
7820 24.0,
7821 0.01,
7822 0.5,
7823 300.75,
7824 kFP64NegativeInfinity,
7825 kFP64PositiveInfinity};
7826 int pg_inputs[] = {1, 1, 0, 1, 0, 1, 1, 1};
7827
7828 uint64_t zd_expected_max[] = {DoubleToRawbits(-2.0),
7829 DoubleToRawbits(8.5),
7830 DoubleToRawbits(3.3),
7831 DoubleToRawbits(0.01),
7832 DoubleToRawbits(5.5),
7833 DoubleToRawbits(300.75),
7834 DoubleToRawbits(kFP64PositiveInfinity),
7835 DoubleToRawbits(kFP64PositiveInfinity)};
7836 FPBinArithHelper(config,
7837 &MacroAssembler::Fmax,
7838 kDRegSize,
7839 zd_inputs,
7840 pg_inputs,
7841 zn_inputs,
7842 zm_inputs,
7843 zd_expected_max);
7844
7845 uint64_t zd_expected_min[] = {DoubleToRawbits(-2.1),
7846 DoubleToRawbits(-13.0),
7847 DoubleToRawbits(3.3),
7848 DoubleToRawbits(0.0),
7849 DoubleToRawbits(5.5),
7850 DoubleToRawbits(-4.75),
7851 DoubleToRawbits(kFP64NegativeInfinity),
7852 DoubleToRawbits(kFP64NegativeInfinity)};
7853 FPBinArithHelper(config,
7854 &MacroAssembler::Fmin,
7855 kDRegSize,
7856 zd_inputs,
7857 pg_inputs,
7858 zn_inputs,
7859 zm_inputs,
7860 zd_expected_min);
7861}
Jacob Bramleyd77a8e42019-02-12 16:52:24 +00007862} // namespace aarch64
7863} // namespace vixl